#library(dtplyr)
library(data.table)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
data.table 1.14.2 using 6 threads (see ?getDTthreads). Latest news: r-datatable.com
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✓ ggplot2 3.3.5 ✓ purrr 0.3.4
✓ tibble 3.1.6 ✓ dplyr 1.0.8
✓ tidyr 1.2.0 ✓ stringr 1.4.0
✓ readr 2.1.2 ✓ forcats 0.5.1
Warning: package ‘tidyr’ was built under R version 4.1.2
Warning: package ‘readr’ was built under R version 4.1.2
Warning: package ‘dplyr’ was built under R version 4.1.2
── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::between() masks data.table::between()
x dplyr::filter() masks stats::filter()
x dplyr::first() masks data.table::first()
x dplyr::lag() masks stats::lag()
x dplyr::last() masks data.table::last()
x purrr::transpose() masks data.table::transpose()
library(skimr)
library(openxlsx)
library(ggrepel)
library(dplyr)
library(tidyr)
library(data.table)
library(broom)
Warning: package ‘broom’ was built under R version 4.1.2
library(broomExtra)
Warning: package ‘broomExtra’ was built under R version 4.1.2
Registered S3 method overwritten by 'parameters':
method from
format.parameters_distribution datawizard
Attaching package: ‘broomExtra’
The following objects are masked from ‘package:broom’:
augment, glance, tidy
library(tibble)
library(sjstats)
Attaching package: ‘sjstats’
The following object is masked from ‘package:broom’:
bootstrap
library(car)
Loading required package: carData
Warning: package ‘carData’ was built under R version 4.1.2
Attaching package: ‘car’
The following object is masked from ‘package:dplyr’:
recode
The following object is masked from ‘package:purrr’:
some
#library(lme4)
#library(lmerTest)
library(ggplot2)
library(tibble)
library(modelr)
Attaching package: ‘modelr’
The following objects are masked from ‘package:sjstats’:
bootstrap, mse, rmse
The following object is masked from ‘package:broom’:
bootstrap
library(tidyverse)
#library(miceadds)
library(ggforce)
require(openxlsx)
library(tidyverse)
library(caret)
Warning: package ‘caret’ was built under R version 4.1.2
Loading required package: lattice
Attaching package: ‘caret’
The following object is masked from ‘package:purrr’:
lift
#library(glmnet)
library(ggplot2)
library(gridExtra)
Attaching package: ‘gridExtra’
The following object is masked from ‘package:dplyr’:
combine
library(MASS) # rlm
Warning: package ‘MASS’ was built under R version 4.1.2
Attaching package: ‘MASS’
The following object is masked from ‘package:dplyr’:
select
library(lmPerm)
Attaching package: ‘lmPerm’
The following object is masked from ‘package:modelr’:
permute
library(circlize)
Warning: package ‘circlize’ was built under R version 4.1.2
========================================
circlize version 0.4.14
CRAN page: https://cran.r-project.org/package=circlize
Github page: https://github.com/jokergoo/circlize
Documentation: https://jokergoo.github.io/circlize_book/book/
If you use it in published research, please cite:
Gu, Z. circlize implements and enhances circular visualization
in R. Bioinformatics 2014.
This message can be suppressed by:
suppressPackageStartupMessages(library(circlize))
========================================
library(RColorBrewer)
Warning: package ‘RColorBrewer’ was built under R version 4.1.2
library(ComplexHeatmap)
Loading required package: grid
========================================
ComplexHeatmap version 2.8.0
Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
Github page: https://github.com/jokergoo/ComplexHeatmap
Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
If you use it in published research, please cite:
Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
genomic data. Bioinformatics 2016.
The new InteractiveComplexHeatmap package can directly export static
complex heatmaps into an interactive Shiny app with zero effort. Have a try!
This message can be suppressed by:
suppressPackageStartupMessages(library(ComplexHeatmap))
========================================
select <- dplyr::select
filter <- dplyr::filter
`%notin%` <- Negate(`%in%`)
save.SessionInfo <- sessionInfo()
save.SessionInfo
R version 4.1.1 (2021-08-10)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Catalina 10.15.7
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] grid stats graphics grDevices utils datasets methods base
other attached packages:
[1] ComplexHeatmap_2.8.0 RColorBrewer_1.1-3 circlize_0.4.14 lmPerm_2.1.0 MASS_7.3-56
[6] gridExtra_2.3 glmnet_4.1-3 caret_6.0-91 lattice_0.20-45 ggforce_0.3.3
[11] forcats_0.5.1 stringr_1.4.0 purrr_0.3.4 readr_2.1.2 tidyverse_1.3.1
[16] modelr_0.1.8 lmerTest_3.1-3 lme4_1.1-29 Matrix_1.4-1 car_3.0-12
[21] carData_3.0-5 sjstats_0.18.1 tibble_3.1.6 broomExtra_4.3.2 broom_0.7.12
[26] data.table_1.14.2 tidyr_1.2.0 dplyr_1.0.8 ggrepel_0.9.1 ggplot2_3.3.5
[31] openxlsx_4.2.5 skimr_2.1.3
loaded via a namespace (and not attached):
[1] readxl_1.4.0 backports_1.4.1 plyr_1.8.7 repr_1.1.4 splines_4.1.1
[6] listenv_0.8.0 TH.data_1.1-0 digest_0.6.29 foreach_1.5.2 htmltools_0.5.2
[11] fansi_1.0.3 magrittr_2.0.3 cluster_2.1.3 doParallel_1.0.17 tzdb_0.3.0
[16] recipes_0.2.0 globals_0.14.0 gower_1.0.0 matrixStats_0.61.0 sandwich_3.0-1
[21] hardhat_0.2.0 colorspace_2.0-3 rvest_1.0.2 haven_2.4.3 xfun_0.30
[26] crayon_1.5.1 jsonlite_1.8.0 survival_3.3-1 zoo_1.8-9 iterators_1.0.14
[31] glue_1.6.2 polyclip_1.10-0 gtable_0.3.0 ipred_0.9-12 emmeans_1.7.3
[36] GetoptLong_1.0.5 sjmisc_2.8.9 future.apply_1.8.1 shape_1.4.6 BiocGenerics_0.38.0
[41] abind_1.4-5 scales_1.2.0 mvtnorm_1.1-3 DBI_1.1.2 Rcpp_1.0.8.3
[46] xtable_1.8-4 performance_0.9.0 clue_0.3-60 bit_4.0.4 stats4_4.1.1
[51] lava_1.6.10 prodlim_2019.11.13 datawizard_0.4.0 httr_1.4.2 ellipsis_0.3.2
[56] pkgconfig_2.0.3 farver_2.1.0 sass_0.4.1 nnet_7.3-17 dbplyr_2.1.1
[61] utf8_1.2.2 tidyselect_1.1.2 rlang_1.0.2 reshape2_1.4.4 effectsize_0.6.0.1
[66] munsell_0.5.0 cellranger_1.1.0 tools_4.1.1 cli_3.2.0 generics_0.1.2
[71] sjlabelled_1.1.8 evaluate_0.15 fastmap_1.1.0 yaml_2.3.5 bit64_4.0.5
[76] ModelMetrics_1.2.2.2 knitr_1.38 fs_1.5.2 zip_2.2.0 future_1.24.0
[81] nlme_3.1-157 xml2_1.3.3 compiler_4.1.1 rstudioapi_0.13 png_0.1-7
[86] reprex_2.0.1 tweenr_1.0.2 bslib_0.3.1 stringi_1.7.6 parameters_0.17.0
[91] nloptr_2.0.0 vctrs_0.4.0 pillar_1.7.0 lifecycle_1.0.1 jquerylib_0.1.4
[96] GlobalOptions_0.1.2 estimability_1.3 insight_0.17.0 R6_2.5.1 IRanges_2.26.0
[101] parallelly_1.30.0 codetools_0.2-18 boot_1.3-28 assertthat_0.2.1 rjson_0.2.21
[106] withr_2.5.0 S4Vectors_0.30.2 multcomp_1.4-18 bayestestR_0.11.5 parallel_4.1.1
[111] hms_1.1.1 rpart_4.1.16 timeDate_3043.102 coda_0.19-4 class_7.3-20
[116] minqa_1.2.4 rmarkdown_2.13 Cairo_1.5-15 pROC_1.18.0 numDeriv_2016.8-1.1
[121] lubridate_1.8.0 base64enc_0.1-3
#setwd('/Users/shawjes/Dropbox/EspinosaGroup/DATA_MAIN/MEGA/Imputation_Ref1000G3v5/Out/Imputation_Results_CLEAN')
dir.ImputationResultsRAW <- "/Users/shawjes/Dropbox/EspinosaGroup/DATA_MAIN/MEGA/Imputation_Ref1000G3v5/Out/Imputation_Results/Results"
dir.ImputationResultsCLEAN <- "/Users/shawjes/Dropbox/EspinosaGroup/DATA_MAIN/MEGA/Imputation_Ref1000G3v5/Out/Imputation_Results_CLEAN"
dir.ImputationResults.QCstats <- "/Users/shawjes/Dropbox/EspinosaGroup/DATA_MAIN/MEGA/Imputation_Ref1000G3v5/Out/Imputation_Results/QC_Statistics"
dir.Data <- "/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data"
dir.Plots <- "/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Plots"
dir.GRSoriginal.Anno <- "/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Annotation/GRSoriginal"
dir.GRSrevised.Anno <- "/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Annotation/GRSrevised"
#cd '/Users/shawjes/Dropbox/EspinosaGroup/DATA_MAIN/MEGA/Imputation_Ref1000G3v5/Out/Imputation_Results_CLEAN'
#cp -a 'MEGA_041122_Espinosa08132019QCpass_IMPUTEDwithPhasing_ref1000G3v5_autosomesNoChr21_v0.1_JRS.vcf.gz' '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data/'
#cp -a 'MEGA_041122_Espinosa08132019QCpass_IMPUTEDwithPhasing_ref1000G3v5_autosomesNoChr21_v0.1_JRS.vcf.gz.csi' '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data/'
#cp -a 'autosomesNoChr21.info.gz' '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data/'
cd '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data/'
zcat < 'MEGA_041122_Espinosa08132019QCpass_IMPUTEDwithPhasing_ref1000G3v5_autosomesNoChr21_v0.1_JRS.vcf.gz' | head -n 44
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##filedate=2022.4.12
##contig=<ID=1>
##INFO=<ID=AF,Number=1,Type=Float,Description="Estimated Alternate Allele Frequency">
##INFO=<ID=MAF,Number=1,Type=Float,Description="Estimated Minor Allele Frequency">
##INFO=<ID=R2,Number=1,Type=Float,Description="Estimated Imputation Accuracy (R-square)">
##INFO=<ID=ER2,Number=1,Type=Float,Description="Empirical (Leave-One-Out) R-square (available only for genotyped variants)">
##INFO=<ID=IMPUTED,Number=0,Type=Flag,Description="Marker was imputed but NOT genotyped">
##INFO=<ID=TYPED,Number=0,Type=Flag,Description="Marker was genotyped AND imputed">
##INFO=<ID=TYPED_ONLY,Number=0,Type=Flag,Description="Marker was genotyped but NOT imputed">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=DS,Number=1,Type=Float,Description="Estimated Alternate Allele Dosage : [P(0/1)+2*P(1/1)]">
##FORMAT=<ID=HDS,Number=2,Type=Float,Description="Estimated Haploid Alternate Allele Dosage">
##FORMAT=<ID=GP,Number=3,Type=Float,Description="Estimated Posterior Probabilities for Genotypes 0/0, 0/1 and 1/1">
##pipeline=michigan-imputationserver-1.6.6
##imputation=minimac4-1.0.2
##phasing=eagle-2.4
##panel=apps@1000g-phase-3-v5
##r2Filter=0.0
##contig=<ID=2>
##contig=<ID=3>
##contig=<ID=4>
##contig=<ID=5>
##contig=<ID=6>
##contig=<ID=7>
##contig=<ID=8>
##contig=<ID=9>
##contig=<ID=10>
##contig=<ID=11>
##contig=<ID=12>
##contig=<ID=13>
##contig=<ID=14>
##contig=<ID=15>
##contig=<ID=16>
##contig=<ID=17>
##contig=<ID=18>
##contig=<ID=19>
##contig=<ID=20>
##contig=<ID=22>
##bcftools_concatVersion=1.14+htslib-1.14
##bcftools_concatCommand=concat -Oz -o /Users/shawjes/Dropbox/EspinosaGroup/DATA_MAIN/MEGA/Imputation_Ref1000G3v5/Out/Imputation_Results_CLEAN/MEGA_041122_Espinosa08132019QCpass_IMPUTEDwithPhasing_ref1000G3v5_autosomesNoChr21_v0.1_JRS.vcf.gz chr1.dose.vcf.gz chr2.dose.vcf.gz chr3.dose.vcf.gz chr4.dose.vcf.gz chr5.dose.vcf.gz chr6.dose.vcf.gz chr7.dose.vcf.gz chr8.dose.vcf.gz chr9.dose.vcf.gz chr10.dose.vcf.gz chr11.dose.vcf.gz chr12.dose.vcf.gz chr13.dose.vcf.gz chr14.dose.vcf.gz chr15.dose.vcf.gz chr16.dose.vcf.gz chr17.dose.vcf.gz chr18.dose.vcf.gz chr19.dose.vcf.gz chr20.dose.vcf.gz chr22.dose.vcf.gz; Date=Wed Apr 13 15:23:47 2022
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1_WG1-DNA_A01_NA19235 2_WG1-DNA_B01_HTP0207A 3_WG1-DNA_C01_HTP0207A 4_WG1-DNA_D01_HTP0222A 5_WG1-DNA_E01_HTP0442A 6_WG1-DNA_F01_HTP0219A 7_WG1-DNA_G01_HTP0465A 8_WG1-DNA_H01_HTP0119A 9_WG1-DNA_B02_HTP0223A 10_WG1-DNA_C02_HTP0471A 11_WG1-DNA_D02_HTP0213A 12_WG1-DNA_E02_HTP0220A 13_WG1-DNA_F02_HTP0224A 14_WG1-DNA_G02_HTP0123A 15_WG1-DNA_H02_HTP0151A 16_WG1-DNA_A03_HTP0215A 17_WG1-DNA_B03_HTP0265A 18_WG1-DNA_C03_HTP0129A 19_WG1-DNA_D03_HTP0198A 20_WG1-DNA_E03_HTP0221A 21_WG1-DNA_F03_HTP0204A 22_WG1-DNA_G03_HTP0217A 23_WG1-DNA_H03_HTP0415A 24_WG1-DNA_A04_HTP0139A 25_WG1-DNA_B04_HTP0218A 26_WG1-DNA_C04_HTP0254A 27_WG1-DNA_D04_HTP0232A 28_WG1-DNA_E04_HTP0238A 29_WG1-DNA_F04_HTP0241A 30_WG1-DNA_G04_HTP0249A 31_WG1-DNA_H04_HTP0258A 32_WG1-DNA_A05_HTP0261A 33_WG1-DNA_B05_HTP0266A 34_WG1-DNA_C05_HTP0226A 35_WG1-DNA_D05_HTP0250A 36_WG1-DNA_E05_HTP0227A 37_WG1-DNA_F05_HTP0233A 38_WG1-DNA_G05_HTP0243A 39_WG1-DNA_H05_HTP0262A 40_WG1-DNA_A06_NA19236 41_WG1-DNA_B06_HTP0267A 42_WG1-DNA_C06_HTP0267A 43_WG1-DNA_D06_HTP0228A 44_WG1-DNA_E06_HTP0251A 45_WG1-DNA_F06_HTP0256A 46_WG1-DNA_G06_HTP0259A 47_WG1-DNA_H06_HTP0234A 48_WG1-DNA_A07_HTP0263A 49_WG1-DNA_B07_HTP0268A 50_WG1-DNA_C07_HTP0229A 51_WG1-DNA_D07_HTP0239A 52_WG1-DNA_E07_HTP0245A 53_WG1-DNA_F07_HTP0253A 54_WG1-DNA_G07_HTP0235A 55_WG1-DNA_H07_HTP0257A 56_WG1-DNA_A08_HTP0260A 57_WG1-DNA_B08_HTP0230A 58_WG1-DNA_C08_HTP0236A 59_WG1-DNA_D08_HTP0240A 60_WG1-DNA_E08_HTP0247A 61_WG1-DNA_F08_HTP0264A 62_WG1-DNA_G08_HTP0269A 63_WG1-DNA_H08_HTP0306A 64_WG1-DNA_A09_HTP0318A 65_WG1-DNA_B09_HTP0084A2 66_WG1-DNA_C09_HTP0270A 67_WG1-DNA_D09_HTP0275A 68_WG1-DNA_E09_HTP0293A 69_WG1-DNA_F09_HTP0300A 70_WG1-DNA_G09_HTP0328A 71_WG1-DNA_H09_HTP0337A 72_WG1-DNA_A10_HTP0310A 73_WG1-DNA_B10_HTP0319A 74_WG1-DNA_C10_HTP0345A 75_WG1-DNA_D10_HTP0272A 76_WG1-DNA_E10_HTP0276A 77_WG1-DNA_F10_HTP0295A 78_WG1-DNA_G10_HTP0301A 79_WG1-DNA_H10_HTP0331A 80_WG1-DNA_A11_HTP0012A2 81_WG1-DNA_B11_HTP0320A 82_WG1-DNA_C11_HTP0303A 83_WG1-DNA_D11_HTP0315A 84_WG1-DNA_E11_HTP0332A 85_WG1-DNA_F11_HTP0273A 86_WG1-DNA_G11_HTP0277A 87_WG1-DNA_H11_HTP0296A 88_WG1-DNA_A12_NA19237 89_WG1-DNA_B12_HTP0334A 90_WG1-DNA_C12_HTP0023A2 91_WG1-DNA_D12_HTP0274A 92_WG1-DNA_E12_HTP0326A 93_WG1-DNA_F12_HTP0335A 94_WG1-DNA_G12_HTP0348A 95_WG1-DNA_H12_HTP0080A2 96_WG2-DNA_B01_HTP0036A2 97_WG2-DNA_C01_HTP0036A2 98_WG2-DNA_D01_HTP0394A 99_WG2-DNA_E01_HTP0350A 100_WG2-DNA_F01_HTP0382A 101_WG2-DNA_G01_HTP0391A 102_WG2-DNA_H01_HTP0398A 103_WG2-DNA_C02_HTP0378A 104_WG2-DNA_D02_HTP0387A 105_WG2-DNA_E02_HTP0395A 106_WG2-DNA_F02_HTP0351A 107_WG2-DNA_G02_HTP0388A 108_WG2-DNA_C03_HTP0384A 109_WG2-DNA_D03_HTP0389A 110_WG2-DNA_E03_HTP0392A 111_WG2-DNA_F03_HTP0052A2 112_WG2-DNA_G03_HTP0373A 113_WG2-DNA_H03_HTP0396A 114_WG2-DNA_B04_HTP0050A2 115_WG2-DNA_C04_HTP0380A 116_WG2-DNA_D04_HTP0385A 117_WG2-DNA_E04_HTP0053A2 118_WG2-DNA_F04_HTP0374A 119_WG2-DNA_G04_HTP0381A 120_WG2-DNA_A05_HTP0393A 121_WG2-DNA_C05_HTP0375A 122_WG2-DNA_D05_HTP0386A 123_WG2-DNA_E05_HTP0397A 124_WG2-DNA_F05_HTP0406A 125_WG2-DNA_G05_HTP0412A 126_WG2-DNA_H05_HTP0422A 127_WG2-DNA_D06_HTP0402A 128_WG2-DNA_E06_HTP0426A 129_WG2-DNA_F06_HTP0435A 130_WG2-DNA_G06_HTP0403A 131_WG2-DNA_H06_HTP0413A 132_WG2-DNA_A07_HTP0419A 133_WG2-DNA_B07_HTP0423A 134_WG2-DNA_C07_HTP0407A 135_WG2-DNA_D07_HTP0410A 136_WG2-DNA_E07_HTP0433A 137_WG2-DNA_F07_HTP0404A 138_WG2-DNA_G07_HTP0408A 139_WG2-DNA_H07_HTP0428A 140_WG2-DNA_A08_HTP0436A 141_WG2-DNA_B08_HTP0414A 142_WG2-DNA_C08_HTP0420A 143_WG2-DNA_D08_HTP0424A 144_WG2-DNA_E08_HTP0411A 145_WG2-DNA_F08_HTP0430A 146_WG2-DNA_G08_HTP0437A 147_WG2-DNA_H08_HTP0405A 148_WG2-DNA_A09_HTP0409A 149_WG2-DNA_B09_HTP0416A 150_WG2-DNA_C09_HTP0421A 151_WG2-DNA_D09_HTP0425A 152_WG2-DNA_E09_HTP0434A 153_WG2-DNA_F09_HTP0418A 154_WG2-DNA_G09_HTP0443A 155_WG2-DNA_H09_HTP0450A 156_WG2-DNA_D10_HTP0469A 157_WG2-DNA_E10_HTP0456A 158_WG2-DNA_F10_HTP0475A 159_WG2-DNA_G10_HTP0480A 160_WG2-DNA_H10_HTP0438A 161_WG2-DNA_A11_HTP0445A 162_WG2-DNA_B11_HTP0451A 163_WG2-DNA_C11_HTP0461A 164_WG2-DNA_D11_HTP0467A 165_WG2-DNA_E11_HTP0476A 166_WG2-DNA_F11_HTP0439A 167_WG2-DNA_G11_HTP0448A 168_WG2-DNA_H11_HTP0458A 169_WG2-DNA_B12_HTP0482A 170_WG2-DNA_C12_HTP0452A 171_WG2-DNA_D12_HTP0462A 172_WG2-DNA_E12_HTP0478A 173_WG2-DNA_F12_HTP0459A 174_WG2-DNA_G12_HTP0468A 175_WG2-DNA_H12_HTP0484A 176_WG3-DNA_A01_NA19235 177_WG3-DNA_B01_HTP0440A 178_WG3-DNA_C01_HTP0440A 179_WG3-DNA_D01_HTP0449A 180_WG3-DNA_E01_HTP0463A 181_WG3-DNA_F01_HTP0479A 182_WG3-DNA_G01_HTP0453A 183_WG3-DNA_H01_HTP0515A 184_WG3-DNA_C02_HTP0519A 185_WG3-DNA_D02_HTP0005A3 186_WG3-DNA_E02_HTP0516A 187_WG3-DNA_F02_HTP0517A 188_WG3-DNA_G02_HTP0329A2 189_WG3-DNA_H02_HTP0015A3 190_WG3-DNA_C03_HTP0333A3 191_WG3-DNA_D03_HTP0112A2 192_WG3-DNA_E03_HTP0512A 193_WG3-DNA_F03_HTP0324A2 194_WG3-DNA_G03_HTP0340A2 195_WG3-DNA_H03_HTP0510A 198_WG3-DNA_F04_HTP0336A 199_WG3-DNA_G04_HTP0203A 200_WG3-DNA_H04_HTP0278A 201_WG3-DNA_A05_HTP0483A 202_WG3-DNA_B05_HTP0498B 203_WG3-DNA_C05_HTP0432A 204_WG3-DNA_D05_HTP0369A 205_WG3-DNA_E05_HTP0561A 206_WG3-DNA_F05_HTP0566A 207_WG3-DNA_G05_HTP0496B 208_WG3-DNA_H05_HTP0497B 209_WG4-DNA_D06_NA19235 210_WG4-DNA_E06_NA19237 211_WG5-DNA_F06_NA19174 212_WG5-DNA_G06_NA19117 214_WG6-DNA_B01_HTP0073A2 215_WG6-DNA_C01_HTP0054A2 216_WG6-DNA_D01_HTP0400A 217_WG6-DNA_E01_HTP0454A 218_WG6-DNA_F01_HTP0298A2 221_WG6-DNA_A02_HTP0431A 222_WG6-DNA_B02_HTP0460A 223_WG6-DNA_C02_HTP0464A 224_WG6-DNA_D02_HTP0399A 225_WG6-DNA_E02_HTP0390A 226_WG6-DNA_F02_HTP0017A4 227_WG6-DNA_G02_HTP0025A3 228_WG6-DNA_H02_HTP0514A 230_WG6-DNA_B03_HTP0066A2 231_WG6-DNA_C03_HTP0518A 232_WG6-DNA_D03_HTP0525A 233_WG6-DNA_E03_HTP0624A 234_WG6-DNA_F03_HTP0103A2 235_WG7-DNA_G03_NA19235 236_WG7-DNA_H03_NA19236 238_WG8-DNA_B04_NA18867 239_WG8-DNA_C04_NA18868 240_WG8-DNA_D04_NA18869 241_WG6-DNA_E04_NA19189 242_WG6-DNA_F04_NA19190 243_WG6-DNA_G04_NA19191 244_WG6-DNA_H04_HTP0431A
1 10177 1:10177:A:AC A AC . PASS AF=0.35936;MAF=0.35936;R2=0.08647;IMPUTED GT:DS:HDS:GP 1|0:0.975:0.524,0.452:0.261,0.502,0.237 0|0:0.765:0.318,0.447:0.377,0.481,0.142 0|0:0.765:0.447,0.318:0.377,0.481,0.142 1|0:0.941:0.529,0.412:0.277,0.505,0.218 0|0:0.749:0.267,0.482:0.380,0.492,0.129 0|0:0.903:0.421,0.482:0.300,0.497,0.203 0|1:1.164:0.416,0.747:0.147,0.541,0.311 0|0:0.506:0.395,0.112:0.538,0.418,0.044 1|0:1.079:0.632,0.448:0.203,0.514,0.283 0|0:0.518:0.341,0.177:0.543,0.397,0.060 0|0:0.857:0.463,0.394:0.325,0.492,0.182 0|0:0.639:0.265,0.373:0.460,0.441,0.099 1|0:1.190:0.719,0.470:0.149,0.513,0.338 0|0:0.547:0.270,0.276:0.528,0.397,0.075 0|0:0.709:0.378,0.330:0.416,0.459,0.125 0|0:0.643:0.231,0.412:0.452,0.452,0.095 0|0:0.631:0.294,0.337:0.468,0.433,0.099 0|0:0.516:0.209,0.307:0.548,0.387,0.064 0|0:0.687:0.357,0.330:0.431,0.451,0.118 0|0:0.772:0.369,0.403:0.377,0.475,0.149 0|1:0.821:0.227,0.594:0.314,0.551,0.135 0|0:0.637:0.231,0.406:0.457,0.449,0.094 0|0:0.603:0.351,0.252:0.486,0.426,0.088 0|0:0.455:0.160,0.295:0.592,0.361,0.047 0|0:0.592:0.191,0.401:0.485,0.439,0.077 0|0:0.821:0.425,0.396:0.347,0.484,0.168 0|0:0.406:0.254,0.151:0.633,0.329,0.038 0|0:0.686:0.422,0.263:0.425,0.463,0.111 0|0:0.718:0.390,0.329:0.410,0.462,0.128 0|0:0.393:0.194,0.199:0.646,0.316,0.039 0|0:0.559:0.391,0.168:0.507,0.427,0.066 0|0:0.825:0.419,0.406:0.345,0.485,0.170 0|0:0.521:0.133,0.389:0.530,0.418,0.052 0|0:0.515:0.311,0.204:0.549,0.388,0.063 0|0:0.585:0.227,0.358:0.497,0.422,0.081 0|0:0.741:0.408,0.333:0.395,0.469,0.136 1|0:1.023:0.638,0.385:0.223,0.532,0.246 0|0:0.581:0.174,0.407:0.490,0.439,0.071 0|0:0.534:0.139,0.395:0.521,0.424,0.055 1|0:0.840:0.727,0.114:0.242,0.675,0.082 0|0:0.625:0.336,0.289:0.472,0.431,0.097 0|0:0.625:0.289,0.336:0.472,0.431,0.097 1|0:1.033:0.554,0.479:0.232,0.502,0.265 0|0:0.665:0.444,0.221:0.433,0.469,0.098 0|0:0.765:0.418,0.347:0.380,0.475,0.145 0|0:0.628:0.324,0.304:0.471,0.431,0.098 0|0:0.877:0.462,0.415:0.315,0.493,0.192 0|0:0.755:0.414,0.341:0.386,0.473,0.141 0|1:0.882:0.230,0.652:0.268,0.582,0.150 0|0:0.723:0.301,0.422:0.404,0.469,0.127 0|0:0.819:0.414,0.405:0.349,0.484,0.168 1|0:0.698:0.519,0.179:0.395,0.512,0.093 0|0:0.642:0.302,0.339:0.461,0.436,0.103 0|0:0.458:0.127,0.331:0.584,0.374,0.042 0|0:0.506:0.295,0.211:0.556,0.382,0.062 0|0:0.408:0.172,0.235:0.633,0.327,0.041 0|0:0.719:0.302,0.417:0.407,0.467,0.126 0|1:0.910:0.397,0.514:0.293,0.503,0.204 0|0:0.639:0.231,0.409:0.455,0.451,0.094 0|0:0.661:0.249,0.411:0.442,0.456,0.103 0|0:0.612:0.317,0.296:0.481,0.425,0.094 0|0:0.270:0.099,0.171:0.747,0.236,0.017 1|0:0.960:0.630,0.329:0.248,0.544,0.208 0|0:0.748:0.389,0.359:0.392,0.469,0.140 0|0:0.860:0.485,0.375:0.322,0.496,0.182 0|0:0.771:0.405,0.367:0.377,0.475,0.148 0|0:0.538:0.273,0.265:0.535,0.393,0.072 1|0:1.040:0.723,0.317:0.189,0.582,0.229 1|0:0.851:0.501,0.351:0.324,0.500,0.175 0|0:0.588:0.164,0.424:0.482,0.449,0.070 0|0:0.777:0.406,0.372:0.373,0.476,0.151 0|0:0.460:0.162,0.298:0.589,0.363,0.048 0|0:0.655:0.404,0.251:0.446,0.452,0.102 0|0:0.560:0.353,0.207:0.513,0.414,0.073 0|0:0.586:0.335,0.251:0.498,0.418,0.084 0|0:0.879:0.477,0.402:0.312,0.496,0.192 0|0:0.669:0.432,0.236:0.433,0.464,0.102 0|1:0.958:0.254,0.705:0.220,0.601,0.179 0|0:0.664:0.393,0.271:0.442,0.451,0.107 0|0:0.938:0.462,0.476:0.282,0.498,0.220 0|0:0.849:0.493,0.356:0.326,0.498,0.176 0|0:0.435:0.163,0.272:0.610,0.346,0.044 0|0:0.541:0.275,0.266:0.532,0.394,0.073 0|0:0.718:0.404,0.314:0.409,0.464,0.127 0|0:0.699:0.374,0.325:0.423,0.456,0.122 0|0:0.676:0.423,0.254:0.431,0.462,0.107 0|0:0.669:0.432,0.236:0.433,0.464,0.102 1|1:1.535:0.719,0.816:0.052,0.362,0.586 0|0:0.649:0.359,0.291:0.455,0.441,0.104 1|0:0.780:0.616,0.164:0.321,0.578,0.101 0|0:0.474:0.258,0.216:0.582,0.362,0.056 0|0:0.693:0.395,0.298:0.425,0.458,0.118 0|0:0.570:0.253,0.317:0.510,0.409,0.080 0|0:0.617:0.301,0.316:0.478,0.427,0.095 0|0:0.657:0.327,0.330:0.451,0.441,0.108 0|0:0.741:0.361,0.380:0.396,0.467,0.137 0|0:0.741:0.361,0.380:0.396,0.467,0.137 0|0:0.584:0.189,0.395:0.491,0.435,0.075 0|0:0.647:0.309,0.338:0.458,0.438,0.104 0|0:0.699:0.379,0.319:0.422,0.456,0.121 1|1:1.237:0.704,0.533:0.138,0.487,0.375 0|0:0.638:0.252,0.386:0.459,0.444,0.097 0|0:0.527:0.379,0.148:0.529,0.415,0.056 1|0:0.704:0.521,0.184:0.391,0.513,0.096 0|0:0.852:0.423,0.429:0.329,0.489,0.181 0|0:0.622:0.213,0.409:0.465,0.448,0.087 0|0:0.484:0.321,0.164:0.568,0.379,0.052 0|0:0.560:0.361,0.199:0.512,0.417,0.072 0|0:0.614:0.313,0.300:0.480,0.426,0.094 0|0:0.572:0.156,0.416:0.493,0.442,0.065 0|0:0.627:0.300,0.327:0.471,0.431,0.098 0|0:0.755:0.434,0.320:0.384,0.476,0.139 0|1:1.123:0.485,0.637:0.187,0.504,0.309 0|0:0.705:0.354,0.351:0.419,0.456,0.124 1|0:0.936:0.540,0.396:0.278,0.508,0.214 1|0:0.835:0.509,0.326:0.331,0.503,0.166 0|0:0.721:0.414,0.306:0.406,0.467,0.127 0|0:0.799:0.434,0.364:0.360,0.482,0.158 0|1:0.984:0.327,0.658:0.230,0.555,0.215 0|0:0.708:0.297,0.410:0.414,0.464,0.122 0|0:0.472:0.171,0.301:0.580,0.369,0.051 0|0:0.604:0.302,0.303:0.487,0.422,0.091 0|0:0.640:0.215,0.425:0.451,0.457,0.091 0|1:0.853:0.209,0.643:0.282,0.583,0.135 0|0:0.410:0.291,0.119:0.624,0.341,0.035 0|0:0.608:0.434,0.174:0.468,0.457,0.075 0|0:0.897:0.445,0.452:0.304,0.495,0.201 0|0:0.354:0.192,0.163:0.677,0.292,0.031 0|0:0.787:0.443,0.344:0.366,0.482,0.152 0|0:0.617:0.220,0.397:0.470,0.443,0.087 1|0:0.978:0.545,0.433:0.258,0.506,0.236 0|0:0.721:0.313,0.408:0.407,0.465,0.128 1|0:0.931:0.581,0.349:0.272,0.525,0.203 0|0:0.616:0.397,0.219:0.471,0.442,0.087 0|0:0.596:0.254,0.342:0.491,0.422,0.087 0|0:0.661:0.423,0.238:0.440,0.460,0.101 0|0:0.778:0.322,0.456:0.369,0.484,0.147 1|0:0.929:0.639,0.289:0.256,0.559,0.185 0|0:0.717:0.413,0.304:0.409,0.466,0.126 0|0:0.424:0.163,0.262:0.618,0.339,0.043 0|0:0.782:0.421,0.361:0.370,0.478,0.152 0|0:0.593:0.206,0.387:0.486,0.434,0.080 0|0:0.819:0.365,0.454:0.347,0.487,0.166 0|0:0.770:0.462,0.308:0.372,0.485,0.143 0|0:0.847:0.402,0.445:0.332,0.489,0.179 0|1:1.141:0.500,0.641:0.179,0.500,0.320 0|0:0.539:0.319,0.219:0.531,0.399,0.070 0|0:0.530:0.335,0.195:0.536,0.399,0.065 0|1:1.207:0.488,0.719:0.144,0.505,0.351 0|0:0.616:0.346,0.269:0.478,0.429,0.093 0|0:0.619:0.215,0.403:0.468,0.445,0.087 0|0:0.869:0.404,0.465:0.319,0.493,0.188 0|0:0.693:0.350,0.342:0.427,0.453,0.120 0|0:0.702:0.341,0.361:0.421,0.456,0.123 0|0:0.634:0.181,0.453:0.448,0.470,0.082 0|0:0.876:0.377,0.499:0.312,0.500,0.188 0|0:0.664:0.317,0.347:0.446,0.444,0.110 0|0:0.717:0.418,0.299:0.408,0.467,0.125 0|0:0.761:0.458,0.303:0.378,0.484,0.139 0|0:0.606:0.400,0.206:0.477,0.441,0.082 0|0:0.756:0.348,0.408:0.386,0.472,0.142 0|0:0.656:0.363,0.293:0.450,0.443,0.106 0|0:0.411:0.328,0.083:0.616,0.357,0.027 0|0:0.689:0.305,0.384:0.428,0.455,0.117 0|0:0.515:0.354,0.161:0.542,0.401,0.057 0|0:0.335:0.191,0.145:0.692,0.280,0.028 0|0:0.637:0.315,0.322:0.464,0.434,0.102 0|0:0.606:0.315,0.291:0.486,0.423,0.092 0|0:0.654:0.375,0.279:0.451,0.445,0.105 0|0:0.635:0.329,0.307:0.466,0.434,0.101 0|0:0.652:0.318,0.334:0.455,0.439,0.106 0|0:0.840:0.482,0.358:0.333,0.495,0.172 0|0:0.708:0.339,0.369:0.417,0.458,0.125 0|0:0.557:0.189,0.368:0.513,0.418,0.070 0|0:0.697:0.397,0.300:0.422,0.459,0.119 1|0:1.245:0.816,0.429:0.105,0.545,0.350 0|1:0.922:0.235,0.686:0.240,0.599,0.162 1|0:0.883:0.648,0.235:0.270,0.578,0.152 1|0:0.932:0.540,0.392:0.280,0.509,0.212 0|0:0.720:0.341,0.379:0.409,0.462,0.129 0|0:0.336:0.176,0.160:0.692,0.280,0.028 1|1:1.326:0.686,0.641:0.113,0.448,0.439 1|1:1.354:0.634,0.720:0.102,0.441,0.457 0|0:0.557:0.245,0.313:0.519,0.404,0.077 1|0:1.031:0.637,0.394:0.220,0.529,0.251 0|0:0.830:0.354,0.476:0.338,0.493,0.168 0|0:0.467:0.262,0.205:0.587,0.360,0.054 0|0:0.410:0.113,0.297:0.623,0.343,0.034 0|0:0.500:0.237,0.262:0.562,0.375,0.062 0|1:0.967:0.327,0.640:0.242,0.548,0.209 0|0:0.584:0.194,0.390:0.492,0.433,0.076 0|0:0.543:0.392,0.151:0.516,0.425,0.059 0|0:0.735:0.351,0.385:0.400,0.466,0.135 0|0:0.356:0.192,0.165:0.675,0.293,0.032 1|0:0.768:0.546,0.222:0.353,0.526,0.121 0|0:0.619:0.290,0.329:0.477,0.428,0.095 0|0:0.825:0.355,0.470:0.342,0.491,0.167 0|0:0.468:0.181,0.287:0.584,0.364,0.052 0|0:0.761:0.459,0.303:0.377,0.484,0.139 0|0:0.631:0.346,0.285:0.467,0.434,0.099 0|0:0.847:0.402,0.445:0.332,0.489,0.179 0|0:0.640:0.309,0.331:0.462,0.435,0.102 0|0:0.633:0.336,0.298:0.467,0.433,0.100 0|0:0.633:0.336,0.298:0.467,0.433,0.100 0|0:0.562:0.231,0.331:0.515,0.409,0.076 0|0:0.771:0.309,0.462:0.372,0.486,0.143 0|0:0.763:0.397,0.366:0.383,0.472,0.145 1|1:1.534:0.816,0.718:0.052,0.362,0.586 1|1:1.472:0.728,0.744:0.070,0.389,0.542 1|0:0.855:0.728,0.126:0.237,0.671,0.092 0|0:0.501:0.276,0.226:0.561,0.377,0.062 0|0:0.604:0.386,0.219:0.480,0.436,0.084 0|0:0.683:0.327,0.355:0.433,0.450,0.116 0|0:0.755:0.395,0.360:0.387,0.471,0.142 0|0:0.652:0.305,0.347:0.454,0.440,0.106 1|0:0.976:0.676,0.300:0.227,0.570,0.203 0|0:0.744:0.365,0.379:0.394,0.467,0.138 0|1:1.207:0.487,0.720:0.144,0.506,0.351 1|0:0.902:0.519,0.382:0.297,0.505,0.199 0|0:0.806:0.468,0.338:0.352,0.490,0.158 0|0:0.582:0.241,0.341:0.500,0.418,0.082 1|0:0.809:0.656,0.153:0.292,0.608,0.100 0|0:0.801:0.316,0.485:0.352,0.495,0.153 0|0:0.716:0.363,0.353:0.412,0.460,0.128 0|0:0.721:0.278,0.443:0.402,0.475,0.123 0|0:0.804:0.386,0.418:0.357,0.481,0.161 0|0:0.517:0.218,0.299:0.548,0.387,0.065 0|0:0.436:0.258,0.178:0.610,0.344,0.046 0|0:0.763:0.397,0.366:0.383,0.472,0.145 1|0:0.840:0.727,0.113:0.242,0.675,0.082 0|0:0.446:0.301,0.145:0.597,0.359,0.044 0|1:0.859:0.131,0.728:0.236,0.669,0.095 0|0:0.509:0.131,0.378:0.540,0.410,0.049 1|0:0.797:0.689,0.108:0.277,0.649,0.074 0|0:0.458:0.401,0.057:0.565,0.412,0.023 0|0:0.257:0.041,0.215:0.752,0.239,0.009 0|1:0.977:0.300,0.676:0.227,0.570,0.203
zcat: error writing to output: Broken pipe
setwd(dir.GRSoriginal.Anno)
Warning: The working directory was changed to /Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Annotation/GRSoriginal inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
sharpS1 <- read.xlsx("apt15826-sup-0001-supinfo.xlsx", sheet = "Table S1", startRow = 3)
sharpS2 <- read.xlsx("apt15826-sup-0001-supinfo.xlsx", sheet = "Table S2", startRow = 3)
sharpS3 <- read.xlsx("apt15826-sup-0001-supinfo.xlsx", sheet = "Table S3", startRow = 3)
sharpS1
sharpS2
sharpS3
rsIDs.GRSorig <- rbind(
c(sharpS1$SNP1, sharpS1$SNP2) %>% as.data.frame() %>% `colnames<-`("GRS_rsID"),
sharpS3 %>% select(SNP) %>% rename(GRS_rsID = SNP) )
rsIDs.GRSorig
https://github.com/sethsh7/hla-prs-toolkit
# Raw link: https://raw.github.com/<username>/<repo>/<branch>/Excelfile.xlsx
#cd '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Annotation/GRSrevised'
#wget https://github.com/sethsh7/hla-prs-toolkit/raw/main/Tools/Snplists/CD_GRS42/CD_GRS42_1000G_nopalin_pos_hg19.xlsx
#wget https://github.com/sethsh7/hla-prs-toolkit/raw/main/Tools/Snplists/CD_GRS42/mapping_1000G.txt
#wget https://github.com/sethsh7/hla-prs-toolkit/raw/main/Tools/Snplists/CD_GRS42/mapping_HRC_TOPMED.txt
#wget https://github.com/sethsh7/hla-prs-toolkit/raw/main/Tools/Snplists/CD_GRS42/scorefile.txt
setwd(dir.GRSrevised.Anno)
Warning: The working directory was changed to /Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Annotation/GRSrevised inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
GRS.revised <- read.xlsx("CD_GRS42_1000G_nopalin_pos_hg19.xlsx")
GRS.revised
rsIDs.GRSrevised <- GRS.revised %>%
select(RSID, POSITION_DBSNP151) %>%
rename(GRS_rsID = RSID) %>%
separate(POSITION_DBSNP151, into = c("CHR", "BP"), sep = ":", extra = "merge", remove = TRUE)
rsIDs.GRSrevised
https://genome.ucsc.edu/cgi-bin/hgTables
rsIDlist.GRSorig <- rsIDs.GRSorig %>% select(GRS_rsID)
rsIDlist.GRSrevised <- rsIDs.GRSrevised %>% select(GRS_rsID)
rsIDlist.GRSorig
rsIDlist.GRSrevised
setwd(dir.Data)
Warning: The working directory was changed to /Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
fwrite(rsIDlist.GRSorig, "MEGA_041822_GRSoriginal_rsID_list_v0.1_JRS.txt",
sep = "\t", quote = FALSE, col.names = FALSE, row.names = FALSE)
fwrite(rsIDlist.GRSrevised, "MEGA_041822_GRSrevised_rsID_list_v0.1_JRS.txt",
sep = "\t", quote = FALSE, col.names = FALSE, row.names = FALSE)
#cp -a '/Users/shawjes/Downloads/MEGA_041822_Annotation_GRh37_GRSoriginal_rsID_list_v0.1_JRS.csv' '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Annotation/GRSoriginal'
#cp -a '/Users/shawjes/Downloads/MEGA_041822_Annotation_GRh37_GRSrevised_rsID_list_v0.1_JRS.csv' '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Annotation/GRSrevised'
https://stackoverflow.com/questions/32004950/how-to-replace-string-in-a-file-in-place-using-sed
#cd '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Annotation/GRSoriginal'
#sed -i '' 's/\#//g' 'MEGA_041822_Annotation_GRh37_GRSoriginal_rsID_list_v0.1_JRS.csv'
#cd '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Annotation/GRSrevised'
#sed -i '' 's/\#//g' 'MEGA_041822_Annotation_GRh37_GRSrevised_rsID_list_v0.1_JRS.csv'
setwd(dir.GRSoriginal.Anno)
Warning: The working directory was changed to /Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Annotation/GRSoriginal inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
GRCh37.GRSorig <- fread("MEGA_041822_Annotation_GRh37_GRSoriginal_rsID_list_v0.1_JRS.csv") %>%
rename(GRS_rsID = name, Chr = chrom, BP = chromEnd) %>%
select(GRS_rsID, Chr, BP) %>% # Note: I have previously determined that the coordinates in the MEGA *.bim file match up with the chromEnd column exported from the Table Browser, at least for the SNPs in this GRS.
filter(grepl("_", Chr)==FALSE) %>% # Note: I have previously determined that we do not need to consider the alternative mappings for the HLA region SNPs (e.g., 'chr6_ssto_hap7' or 'chr6_qbl_hap6'), so we will exclude those rows from the dataframe
mutate(Chr = gsub("chr", "", Chr) %>% as.numeric())
Warning in fread("MEGA_041822_Annotation_GRh37_GRSoriginal_rsID_list_v0.1_JRS.csv") :
Previous fread() session was not cleaned up properly. Cleaned up ok at the beginning of this fread() call.
setwd(dir.GRSrevised.Anno)
GRCh37.GRSrevised <- fread("MEGA_041822_Annotation_GRh37_GRSrevised_rsID_list_v0.1_JRS.csv") %>%
rename(GRS_rsID = name, Chr = chrom, BP = chromEnd) %>%
select(GRS_rsID, Chr, BP) %>% # Note: I have previously determined that the coordinates in the MEGA *.bim file match up with the chromEnd column exported from the Table Browser, at least for the SNPs in this GRS.
filter(grepl("_", Chr)==FALSE) %>% # Note: I have previously determined that we do not need to consider the alternative mappings for the HLA region SNPs (e.g., 'chr6_ssto_hap7' or 'chr6_qbl_hap6'), so we will exclude those rows from the dataframe
mutate(Chr = gsub("chr", "", Chr) %>% as.numeric())
GRCh37.GRSorig
GRCh37.GRSrevised
setwd(dir.GRSrevised.Anno)
Warning: The working directory was changed to /Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Annotation/GRSrevised inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
anno.revisedGRS <- read.xlsx("CD_GRS42_1000G_nopalin_pos_hg19.xlsx")
anno.revisedGRS %>%
select(RSID, POSITION_DBSNP151) %>%
separate(POSITION_DBSNP151, into = c("Chr", "BP"), sep = ":") %>%
rename(GRS_rsID = RSID) %>%
full_join(GRCh37.GRSrevised, by = "GRS_rsID") %>%
filter(Chr.x != Chr.y | BP.x != BP.y)
# 0 rows = all good.
to_extract <- rbind(GRCh37.GRSorig %>%
mutate(GRS_version = "Original"),
GRCh37.GRSrevised %>%
mutate(GRS_version = "Revised")) %>%
mutate(CHR_BP = paste(Chr, BP, sep = ":")) %>%
arrange(CHR_BP) %>%
mutate(Indicator = 1) %>%
spread(key = GRS_version, value = Indicator) %>%
rename(In_Original_GRS = Original,
In_Revised_GRS = Revised) %>%
mutate(In_Original_GRS = ifelse(is.na(In_Original_GRS), 0, In_Original_GRS),
In_Revised_GRS = ifelse(is.na(In_Revised_GRS), 0, In_Revised_GRS))
to_extract
Follow-up email to Sharp team to confirm that putative genes are assumed to still be the same in the revised GRS (this will impact Neetha’s multi-omics figures)
# FOLLOW-UP
# Use this to determine how far apart the original vs. replacement SNPs are from one another.
to_extract
(View the console to copy the full string)
to_extract$CHR_BP %>%
paste(collapse = ",")
[1] "2:68645560,2:182007800,6:32627713,6:408079,1:200881392,11:118579865,1:172862984,14:69259502,18:12843137,1:172681031,1:172864652,10:81058027,2:61186829,4:123551114,1:192541021,3:159623559,1:192541472,15:75096443,6:29828916,6:32673893,6:138005515,6:159469574,21:43855067,2:204610396,3:188119901,6:32605884,6:33034596,10:6390192,3:159674928,6:33074288,12:111884608,6:32681530,6:31348365,1:2539400,22:21979289,6:128293562,3:119123278,11:128391937,16:10964118,2:191913034,6:33079812,11:111196858,6:32603374,3:46205686,6:32658079,6:32620628,6:32671412,10:6390450,2:103086770"
# https://samtools.github.io/bcftools/bcftools.html
# bcftools -r, --regions chr|chr:pos|chr:from-to|chr:from-[,…​]
cd '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data'
bcftools view \
--regions \
2:68645560,2:182007800,6:32627713,6:408079,1:200881392,11:118579865,1:172862984,14:69259502,18:12843137,1:172681031,1:172864652,10:81058027,2:61186829,4:123551114,1:192541021,3:159623559,1:192541472,15:75096443,6:29828916,6:32673893,6:138005515,6:159469574,21:43855067,2:204610396,3:188119901,6:32605884,6:33034596,10:6390192,3:159674928,6:33074288,12:111884608,6:32681530,6:31348365,1:2539400,22:21979289,6:128293562,3:119123278,11:128391937,16:10964118,2:191913034,6:33079812,11:111196858,6:32603374,3:46205686,6:32658079,6:32620628,6:32671412,10:6390450,2:103086770 \
'MEGA_041122_Espinosa08132019QCpass_IMPUTEDwithPhasing_ref1000G3v5_autosomesNoChr21_v0.1_JRS.vcf.gz' \
--output 'MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.vcf.gz'
cd '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data'
zcat < 'MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.vcf.gz' | head -n 46
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##filedate=2022.4.12
##contig=<ID=1>
##INFO=<ID=AF,Number=1,Type=Float,Description="Estimated Alternate Allele Frequency">
##INFO=<ID=MAF,Number=1,Type=Float,Description="Estimated Minor Allele Frequency">
##INFO=<ID=R2,Number=1,Type=Float,Description="Estimated Imputation Accuracy (R-square)">
##INFO=<ID=ER2,Number=1,Type=Float,Description="Empirical (Leave-One-Out) R-square (available only for genotyped variants)">
##INFO=<ID=IMPUTED,Number=0,Type=Flag,Description="Marker was imputed but NOT genotyped">
##INFO=<ID=TYPED,Number=0,Type=Flag,Description="Marker was genotyped AND imputed">
##INFO=<ID=TYPED_ONLY,Number=0,Type=Flag,Description="Marker was genotyped but NOT imputed">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=DS,Number=1,Type=Float,Description="Estimated Alternate Allele Dosage : [P(0/1)+2*P(1/1)]">
##FORMAT=<ID=HDS,Number=2,Type=Float,Description="Estimated Haploid Alternate Allele Dosage">
##FORMAT=<ID=GP,Number=3,Type=Float,Description="Estimated Posterior Probabilities for Genotypes 0/0, 0/1 and 1/1">
##pipeline=michigan-imputationserver-1.6.6
##imputation=minimac4-1.0.2
##phasing=eagle-2.4
##panel=apps@1000g-phase-3-v5
##r2Filter=0.0
##contig=<ID=2>
##contig=<ID=3>
##contig=<ID=4>
##contig=<ID=5>
##contig=<ID=6>
##contig=<ID=7>
##contig=<ID=8>
##contig=<ID=9>
##contig=<ID=10>
##contig=<ID=11>
##contig=<ID=12>
##contig=<ID=13>
##contig=<ID=14>
##contig=<ID=15>
##contig=<ID=16>
##contig=<ID=17>
##contig=<ID=18>
##contig=<ID=19>
##contig=<ID=20>
##contig=<ID=22>
##bcftools_concatVersion=1.14+htslib-1.14
##bcftools_concatCommand=concat -Oz -o /Users/shawjes/Dropbox/EspinosaGroup/DATA_MAIN/MEGA/Imputation_Ref1000G3v5/Out/Imputation_Results_CLEAN/MEGA_041122_Espinosa08132019QCpass_IMPUTEDwithPhasing_ref1000G3v5_autosomesNoChr21_v0.1_JRS.vcf.gz chr1.dose.vcf.gz chr2.dose.vcf.gz chr3.dose.vcf.gz chr4.dose.vcf.gz chr5.dose.vcf.gz chr6.dose.vcf.gz chr7.dose.vcf.gz chr8.dose.vcf.gz chr9.dose.vcf.gz chr10.dose.vcf.gz chr11.dose.vcf.gz chr12.dose.vcf.gz chr13.dose.vcf.gz chr14.dose.vcf.gz chr15.dose.vcf.gz chr16.dose.vcf.gz chr17.dose.vcf.gz chr18.dose.vcf.gz chr19.dose.vcf.gz chr20.dose.vcf.gz chr22.dose.vcf.gz; Date=Wed Apr 13 15:23:47 2022
##bcftools_viewVersion=1.14+htslib-1.14
##bcftools_viewCommand=view --regions 2:68645560,2:182007800,6:32627713,6:408079,1:200881392,11:118579865,1:172862984,14:69259502,18:12843137,1:172681031,1:172864652,10:81058027,2:61186829,4:123551114,1:192541021,3:159623559,1:192541472,15:75096443,6:29828916,6:32673893,6:138005515,6:159469574,21:43855067,2:204610396,3:188119901,6:32605884,6:33034596,10:6390192,3:159674928,6:33074288,12:111884608,6:32681530,6:31348365,1:2539400,22:21979289,6:128293562,3:119123278,11:128391937,16:10964118,2:191913034,6:33079812,11:111196858,6:32603374,3:46205686,6:32658079,6:32620628,6:32671412,10:6390450,2:103086770 --output MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.vcf.gz MEGA_041122_Espinosa08132019QCpass_IMPUTEDwithPhasing_ref1000G3v5_autosomesNoChr21_v0.1_JRS.vcf.gz; Date=Fri Apr 22 18:36:43 2022
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1_WG1-DNA_A01_NA19235 2_WG1-DNA_B01_HTP0207A 3_WG1-DNA_C01_HTP0207A 4_WG1-DNA_D01_HTP0222A 5_WG1-DNA_E01_HTP0442A 6_WG1-DNA_F01_HTP0219A 7_WG1-DNA_G01_HTP0465A 8_WG1-DNA_H01_HTP0119A 9_WG1-DNA_B02_HTP0223A 10_WG1-DNA_C02_HTP0471A 11_WG1-DNA_D02_HTP0213A 12_WG1-DNA_E02_HTP0220A 13_WG1-DNA_F02_HTP0224A 14_WG1-DNA_G02_HTP0123A 15_WG1-DNA_H02_HTP0151A 16_WG1-DNA_A03_HTP0215A 17_WG1-DNA_B03_HTP0265A 18_WG1-DNA_C03_HTP0129A 19_WG1-DNA_D03_HTP0198A 20_WG1-DNA_E03_HTP0221A 21_WG1-DNA_F03_HTP0204A 22_WG1-DNA_G03_HTP0217A 23_WG1-DNA_H03_HTP0415A 24_WG1-DNA_A04_HTP0139A 25_WG1-DNA_B04_HTP0218A 26_WG1-DNA_C04_HTP0254A 27_WG1-DNA_D04_HTP0232A 28_WG1-DNA_E04_HTP0238A 29_WG1-DNA_F04_HTP0241A 30_WG1-DNA_G04_HTP0249A 31_WG1-DNA_H04_HTP0258A 32_WG1-DNA_A05_HTP0261A 33_WG1-DNA_B05_HTP0266A 34_WG1-DNA_C05_HTP0226A 35_WG1-DNA_D05_HTP0250A 36_WG1-DNA_E05_HTP0227A 37_WG1-DNA_F05_HTP0233A 38_WG1-DNA_G05_HTP0243A 39_WG1-DNA_H05_HTP0262A 40_WG1-DNA_A06_NA19236 41_WG1-DNA_B06_HTP0267A 42_WG1-DNA_C06_HTP0267A 43_WG1-DNA_D06_HTP0228A 44_WG1-DNA_E06_HTP0251A 45_WG1-DNA_F06_HTP0256A 46_WG1-DNA_G06_HTP0259A 47_WG1-DNA_H06_HTP0234A 48_WG1-DNA_A07_HTP0263A 49_WG1-DNA_B07_HTP0268A 50_WG1-DNA_C07_HTP0229A 51_WG1-DNA_D07_HTP0239A 52_WG1-DNA_E07_HTP0245A 53_WG1-DNA_F07_HTP0253A 54_WG1-DNA_G07_HTP0235A 55_WG1-DNA_H07_HTP0257A 56_WG1-DNA_A08_HTP0260A 57_WG1-DNA_B08_HTP0230A 58_WG1-DNA_C08_HTP0236A 59_WG1-DNA_D08_HTP0240A 60_WG1-DNA_E08_HTP0247A 61_WG1-DNA_F08_HTP0264A 62_WG1-DNA_G08_HTP0269A 63_WG1-DNA_H08_HTP0306A 64_WG1-DNA_A09_HTP0318A 65_WG1-DNA_B09_HTP0084A2 66_WG1-DNA_C09_HTP0270A 67_WG1-DNA_D09_HTP0275A 68_WG1-DNA_E09_HTP0293A 69_WG1-DNA_F09_HTP0300A 70_WG1-DNA_G09_HTP0328A 71_WG1-DNA_H09_HTP0337A 72_WG1-DNA_A10_HTP0310A 73_WG1-DNA_B10_HTP0319A 74_WG1-DNA_C10_HTP0345A 75_WG1-DNA_D10_HTP0272A 76_WG1-DNA_E10_HTP0276A 77_WG1-DNA_F10_HTP0295A 78_WG1-DNA_G10_HTP0301A 79_WG1-DNA_H10_HTP0331A 80_WG1-DNA_A11_HTP0012A2 81_WG1-DNA_B11_HTP0320A 82_WG1-DNA_C11_HTP0303A 83_WG1-DNA_D11_HTP0315A 84_WG1-DNA_E11_HTP0332A 85_WG1-DNA_F11_HTP0273A 86_WG1-DNA_G11_HTP0277A 87_WG1-DNA_H11_HTP0296A 88_WG1-DNA_A12_NA19237 89_WG1-DNA_B12_HTP0334A 90_WG1-DNA_C12_HTP0023A2 91_WG1-DNA_D12_HTP0274A 92_WG1-DNA_E12_HTP0326A 93_WG1-DNA_F12_HTP0335A 94_WG1-DNA_G12_HTP0348A 95_WG1-DNA_H12_HTP0080A2 96_WG2-DNA_B01_HTP0036A2 97_WG2-DNA_C01_HTP0036A2 98_WG2-DNA_D01_HTP0394A 99_WG2-DNA_E01_HTP0350A 100_WG2-DNA_F01_HTP0382A 101_WG2-DNA_G01_HTP0391A 102_WG2-DNA_H01_HTP0398A 103_WG2-DNA_C02_HTP0378A 104_WG2-DNA_D02_HTP0387A 105_WG2-DNA_E02_HTP0395A 106_WG2-DNA_F02_HTP0351A 107_WG2-DNA_G02_HTP0388A 108_WG2-DNA_C03_HTP0384A 109_WG2-DNA_D03_HTP0389A 110_WG2-DNA_E03_HTP0392A 111_WG2-DNA_F03_HTP0052A2 112_WG2-DNA_G03_HTP0373A 113_WG2-DNA_H03_HTP0396A 114_WG2-DNA_B04_HTP0050A2 115_WG2-DNA_C04_HTP0380A 116_WG2-DNA_D04_HTP0385A 117_WG2-DNA_E04_HTP0053A2 118_WG2-DNA_F04_HTP0374A 119_WG2-DNA_G04_HTP0381A 120_WG2-DNA_A05_HTP0393A 121_WG2-DNA_C05_HTP0375A 122_WG2-DNA_D05_HTP0386A 123_WG2-DNA_E05_HTP0397A 124_WG2-DNA_F05_HTP0406A 125_WG2-DNA_G05_HTP0412A 126_WG2-DNA_H05_HTP0422A 127_WG2-DNA_D06_HTP0402A 128_WG2-DNA_E06_HTP0426A 129_WG2-DNA_F06_HTP0435A 130_WG2-DNA_G06_HTP0403A 131_WG2-DNA_H06_HTP0413A 132_WG2-DNA_A07_HTP0419A 133_WG2-DNA_B07_HTP0423A 134_WG2-DNA_C07_HTP0407A 135_WG2-DNA_D07_HTP0410A 136_WG2-DNA_E07_HTP0433A 137_WG2-DNA_F07_HTP0404A 138_WG2-DNA_G07_HTP0408A 139_WG2-DNA_H07_HTP0428A 140_WG2-DNA_A08_HTP0436A 141_WG2-DNA_B08_HTP0414A 142_WG2-DNA_C08_HTP0420A 143_WG2-DNA_D08_HTP0424A 144_WG2-DNA_E08_HTP0411A 145_WG2-DNA_F08_HTP0430A 146_WG2-DNA_G08_HTP0437A 147_WG2-DNA_H08_HTP0405A 148_WG2-DNA_A09_HTP0409A 149_WG2-DNA_B09_HTP0416A 150_WG2-DNA_C09_HTP0421A 151_WG2-DNA_D09_HTP0425A 152_WG2-DNA_E09_HTP0434A 153_WG2-DNA_F09_HTP0418A 154_WG2-DNA_G09_HTP0443A 155_WG2-DNA_H09_HTP0450A 156_WG2-DNA_D10_HTP0469A 157_WG2-DNA_E10_HTP0456A 158_WG2-DNA_F10_HTP0475A 159_WG2-DNA_G10_HTP0480A 160_WG2-DNA_H10_HTP0438A 161_WG2-DNA_A11_HTP0445A 162_WG2-DNA_B11_HTP0451A 163_WG2-DNA_C11_HTP0461A 164_WG2-DNA_D11_HTP0467A 165_WG2-DNA_E11_HTP0476A 166_WG2-DNA_F11_HTP0439A 167_WG2-DNA_G11_HTP0448A 168_WG2-DNA_H11_HTP0458A 169_WG2-DNA_B12_HTP0482A 170_WG2-DNA_C12_HTP0452A 171_WG2-DNA_D12_HTP0462A 172_WG2-DNA_E12_HTP0478A 173_WG2-DNA_F12_HTP0459A 174_WG2-DNA_G12_HTP0468A 175_WG2-DNA_H12_HTP0484A 176_WG3-DNA_A01_NA19235 177_WG3-DNA_B01_HTP0440A 178_WG3-DNA_C01_HTP0440A 179_WG3-DNA_D01_HTP0449A 180_WG3-DNA_E01_HTP0463A 181_WG3-DNA_F01_HTP0479A 182_WG3-DNA_G01_HTP0453A 183_WG3-DNA_H01_HTP0515A 184_WG3-DNA_C02_HTP0519A 185_WG3-DNA_D02_HTP0005A3 186_WG3-DNA_E02_HTP0516A 187_WG3-DNA_F02_HTP0517A 188_WG3-DNA_G02_HTP0329A2 189_WG3-DNA_H02_HTP0015A3 190_WG3-DNA_C03_HTP0333A3 191_WG3-DNA_D03_HTP0112A2 192_WG3-DNA_E03_HTP0512A 193_WG3-DNA_F03_HTP0324A2 194_WG3-DNA_G03_HTP0340A2 195_WG3-DNA_H03_HTP0510A 198_WG3-DNA_F04_HTP0336A 199_WG3-DNA_G04_HTP0203A 200_WG3-DNA_H04_HTP0278A 201_WG3-DNA_A05_HTP0483A 202_WG3-DNA_B05_HTP0498B 203_WG3-DNA_C05_HTP0432A 204_WG3-DNA_D05_HTP0369A 205_WG3-DNA_E05_HTP0561A 206_WG3-DNA_F05_HTP0566A 207_WG3-DNA_G05_HTP0496B 208_WG3-DNA_H05_HTP0497B 209_WG4-DNA_D06_NA19235 210_WG4-DNA_E06_NA19237 211_WG5-DNA_F06_NA19174 212_WG5-DNA_G06_NA19117 214_WG6-DNA_B01_HTP0073A2 215_WG6-DNA_C01_HTP0054A2 216_WG6-DNA_D01_HTP0400A 217_WG6-DNA_E01_HTP0454A 218_WG6-DNA_F01_HTP0298A2 221_WG6-DNA_A02_HTP0431A 222_WG6-DNA_B02_HTP0460A 223_WG6-DNA_C02_HTP0464A 224_WG6-DNA_D02_HTP0399A 225_WG6-DNA_E02_HTP0390A 226_WG6-DNA_F02_HTP0017A4 227_WG6-DNA_G02_HTP0025A3 228_WG6-DNA_H02_HTP0514A 230_WG6-DNA_B03_HTP0066A2 231_WG6-DNA_C03_HTP0518A 232_WG6-DNA_D03_HTP0525A 233_WG6-DNA_E03_HTP0624A 234_WG6-DNA_F03_HTP0103A2 235_WG7-DNA_G03_NA19235 236_WG7-DNA_H03_NA19236 238_WG8-DNA_B04_NA18867 239_WG8-DNA_C04_NA18868 240_WG8-DNA_D04_NA18869 241_WG6-DNA_E04_NA19189 242_WG6-DNA_F04_NA19190 243_WG6-DNA_G04_NA19191 244_WG6-DNA_H04_HTP0431A
2 61186829 2:61186829:A:G A G . PASS AF=0.40611;MAF=0.40611;R2=0.9961;ER2=0.96354;TYPED GT:DS:HDS:GP 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|1:1:0,1:0,1,0 0|0:0:0,0:1,0,0 0|1:1:0,1:0,1,0 1|1:2:1,1:0,0,1 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|1:1:0,1:0,1,0 1|0:1:1,0:0,1,0 1|1:2:1,1:0,0,1 0|0:0:0,0:1,0,0 1|1:2:1,1:0,0,1 0|0:0:0,0:1,0,0 0|1:1:0,1:0,1,0 0|0:0:0,0:1,0,0 0|1:1:0,1:0,1,0 1|0:1:1,0:0,1,0 0|1:1:0,1:0,1,0 0|1:1:0,1:0,1,0 0|0:0:0,0:1,0,0 0|1:1:0,1:0,1,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 0|1:1:0,1:0,1,0 1|1:2:1,1:0,0,1 1|0:1:1,0:0,1,0 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 0|0:0.001:0.001,0:0.999,0.001,0 0|0:0:0,0:1,0,0 0|1:1:0,1:0,1,0 0|1:1:0,1:0,1,0 1|1:2:1,1:0,0,1 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 1|1:2:1,1:0,0,1 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 1|1:2:1,1:0,0,1 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 1|1:2:1,1:0,0,1 1|1:1.999:1,0.999:0,0.001,0.999 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 1|1:1.998:0.998,1:0,0.002,0.998 0|1:1:0,1:0,1,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 1|1:2:1,1:0,0,1 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|1:1:0,1:0,1,0 0|1:1:0,1:0,1,0 1|1:2:1,1:0,0,1 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 1|1:2:1,1:0,0,1 1|1:2:1,1:0,0,1 0|0:0:0,0:1,0,0 1|1:2:1,1:0,0,1 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 1|0:1:1,0:0,1,0 0|1:1:0,1:0,1,0 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 1|1:2:1,1:0,0,1 1|1:2:1,1:0,0,1 1|0:1:1,0:0,1,0 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 0|1:1:0,1:0,1,0 1|0:1:1,0:0,1,0 1|1:2:1,1:0,0,1 0|1:1:0,1:0,1,0 0|0:0:0,0:1,0,0 1|1:2:1,1:0,0,1 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 0|1:1:0,1:0,1,0 1|1:2:1,1:0,0,1 0|1:1:0,1:0,1,0 0|1:1:0,1:0,1,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|1:1:0,1:0,1,0 0|0:0:0,0:1,0,0 0|1:1:0,1:0,1,0 1|1:2:1,1:0,0,1 1|1:2:1,1:0,0,1 1|0:1:1,0:0,1,0 1|1:2:1,1:0,0,1 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 1|1:2:1,1:0,0,1 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|1:1:0,1:0,1,0 1|0:0.972:0.972,0:0.028,0.972,0 0|1:1:0,1:0,1,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 0|1:1:0,1:0,1,0 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 1|1:2:1,1:0,0,1 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|1:1:0,1:0,1,0 1|1:2:1,1:0,0,1 0|0:0:0,0:1,0,0 0|0:0.009:0.001,0.008:0.991,0.009,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 1|1:2:1,1:0,0,1 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 0|0:0.012:0,0.012:0.988,0.012,0 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 1|0:1:1,0:0,1,0 1|1:2:1,1:0,0,1 1|1:2:1,1:0,0,1 1|1:2:1,1:0,0,1 0|0:0:0,0:1,0,0 1|1:2:1,1:0,0,1 1|1:2:1,1:0,0,1 0|1:1:0,1:0,1,0 0|0:0:0,0:1,0,0 1|1:2:1,1:0,0,1 1|1:2:1,1:0,0,1 1|1:2:1,1:0,0,1 0|1:1:0,1:0,1,0 0|0:0:0,0:1,0,0 0|1:1:0,1:0,1,0 0|1:1:0,1:0,1,0 0|1:1:0,1:0,1,0 0|0:0:0,0:1,0,0 0|0:0.012:0.012,0:0.988,0.012,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 1|1:1.492:0.746,0.746:0.065,0.379,0.556 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|1:1:0,1:0,1,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 1|0:1:1,0:0,1,0 1|0:1:1,0:0,1,0 0|1:1:0,1:0,1,0 1|1:2:1,1:0,0,1 1|0:1:1,0:0,1,0 1|1:2:1,1:0,0,1 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|1:1:0,1:0,1,0 1|1:2:1,1:0,0,1 0|1:1:0,1:0,1,0 1|1:2:1,1:0,0,1 1|1:2:1,1:0,0,1 1|0:1:1,0:0,1,0 1|0:1:1,0:0,1,0 1|0:1:1,0:0,1,0 1|0:1:1,0:0,1,0 1|1:2:1,1:0,0,1 1|1:2:1,1:0,0,1 1|0:1:1,0:0,1,0 1|1:2:1,1:0,0,1 0|1:1:0,1:0,1,0 1|0:1:1,0:0,1,0 1|1:2:1,1:0,0,1 1|0:1:1,0:0,1,0 0|1:1:0,1:0,1,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 0|0:0:0,0:1,0,0 1|0:1:1,0:0,1,0 1|1:2:1,1:0,0,1 1|0:1:1,0:0,1,0 1|1:2:1,1:0,0,1 1|0:1:1,0:0,1,0 1|1:2:1,1:0,0,1 1|1:2:1,1:0,0,1 1|1:2:1,1:0,0,1 1|0:1:1,0:0,1,0
zcat: error writing to output: Broken pipe
# Note: Use --const-fid to make output use only the MEGA IID. We will use FamilyID as defined in HTP clinical data instead of the arbitrary FIDs shown in the MEGA data.
cd '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data'
plink2 --vcf 'MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.vcf.gz' \
--const-fid \
--make-bed \
--out 'MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS'
plink2 --vcf 'MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.vcf.gz' \
--const-fid \
--export A-transpose \
--out 'MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS'
PLINK v2.00a3 64-bit (11 Oct 2021) www.cog-genomics.org/plink/2.0/
(C) 2005-2021 Shaun Purcell, Christopher Chang GNU General Public License v3
Logging to MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.log.
Options in effect:
--const-fid
--make-bed
--out MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS
--vcf MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.vcf.gz
Start time: Fri Apr 22 18:36:49 2022
16384 MiB RAM detected; reserving 8192 MiB for main workspace.
Using up to 12 threads (change this with --threads).
--vcf: 48 variants scanned.
--vcf: 0k variants converted.
--vcf:
MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS-temporary.pgen +
MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS-temporary.pvar.zst
+ MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS-temporary.psam
written.
237 samples (0 females, 0 males, 237 ambiguous; 237 founders) loaded from
MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS-temporary.psam.
48 variants loaded from
MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS-temporary.pvar.zst.
Note: No phenotype data present.
Writing MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.fam ...
done.
Writing MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.bim ...
done.
Writing MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.bed ...
0%done.
End time: Fri Apr 22 18:36:49 2022
PLINK v2.00a3 64-bit (11 Oct 2021) www.cog-genomics.org/plink/2.0/
(C) 2005-2021 Shaun Purcell, Christopher Chang GNU General Public License v3
Logging to MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.log.
Options in effect:
--const-fid
--export A-transpose
--out MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS
--vcf MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.vcf.gz
Start time: Fri Apr 22 18:36:49 2022
16384 MiB RAM detected; reserving 8192 MiB for main workspace.
Using up to 12 threads (change this with --threads).
--vcf: 48 variants scanned.
--vcf: 0k variants converted.
--vcf:
MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS-temporary.pgen +
MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS-temporary.pvar.zst
+ MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS-temporary.psam
written.
237 samples (0 females, 0 males, 237 ambiguous; 237 founders) loaded from
MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS-temporary.psam.
48 variants loaded from
MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS-temporary.pvar.zst.
Note: No phenotype data present.
--export A-transpose to
MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.traw ... 0%0%2%4%6%8%10%12%14%16%18%20%22%25%27%29%31%33%35%37%39%41%43%45%47%50%52%54%56%58%60%62%64%66%68%70%72%75%77%79%81%83%85%87%89%91%93%95%97%done.
End time: Fri Apr 22 18:36:49 2022
setwd("/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data")
Warning: The working directory was changed to /Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
imputed.traw <- fread("MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.traw") %>%
gather(key = "IID", value = "Dosage", contains("DNA")) %>%
filter(grepl("HTP", IID)==TRUE) %>%
select(-c(`(C)M`)) %>%
rename(Dosage.COUNTED = Dosage) %>%
mutate(Extracted = 1) %>%
full_join(to_extract %>% rename(CHR_POS = CHR_BP), by = c("CHR"="Chr", "POS"="BP")) %>%
mutate(Extracted = ifelse(is.na(Extracted), 0, Extracted)) %>%
filter(In_Original_GRS == 1 | In_Revised_GRS == 1)
imputed.traw
inventory <- imputed.traw %>%
select(-c(IID, Dosage.COUNTED)) %>%
unique() %>%
select(GRS_rsID, CHR, POS, CHR_POS, In_Original_GRS, In_Revised_GRS, everything())
inventory
inventory %>% filter(Extracted == 0)
inventory %>%
filter(In_Original_GRS == 1) %>%
group_by(CHR, In_Original_GRS, Extracted) %>%
summarise(N = n())
`summarise()` has grouped output by 'CHR', 'In_Original_GRS'. You can override using the `.groups` argument.
inventory %>%
filter(In_Revised_GRS == 1) %>%
group_by(CHR, In_Revised_GRS, Extracted) %>%
summarise(N = n())
`summarise()` has grouped output by 'CHR', 'In_Revised_GRS'. You can override using the `.groups` argument.
print("Perfect, we got all of the SNPs except for the one located on Chromosome 21, which we purposely excluded from imputation and will deal with later.")
[1] "Perfect, we got all of the SNPs except for the one located on Chromosome 21, which we purposely excluded from imputation and will deal with later."
imputed.traw01 %>%
select(-c(IID, Dosage.COUNTED)) %>%
unique() %>%
mutate(CHR_POS = paste(CHR, POS, sep = ":")) %>%
select(CHR_POS, VCF_VariantID) %>%
unique() %>%
group_by(CHR_POS) %>%
summarise(N_SNPs_at_ChrBP = n()) %>%
arrange(desc(N_SNPs_at_ChrBP))
# Print chromosome, position, ref allele and the first alternate allele
#cd '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data'
#bcftools query -f '%CHROM %POS %REF %ALT{0} %INFO\n' 'MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.vcf.gz' > 'MEGA_041822_QualityINFO_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.txt'
#cat 'MEGA_041822_QualityINFO_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.txt'
setwd(dir.Data)
Warning: The working directory was changed to /Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
extracted.INFO <- fread("MEGA_041822_QualityINFO_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.txt") %>%
`colnames<-`(c("CHR", "POS", "REF", "ALT", "INFO")) %>%
separate(INFO, into = c("AF", "MAF", "R2", "temp1", "temp2"), sep = ";", extra = "merge", remove = FALSE) %>%
mutate(ER2 = ifelse(grepl("ER2", temp1)==TRUE, temp1, NA),
Type = ifelse(grepl("ER2", temp1)==FALSE, temp1, temp2)) %>%
select(-c(temp1, temp2)) %>%
mutate(AF = gsub("AF=", "", AF),
MAF = gsub("MAF=", "", MAF),
R2 = gsub("R2=", "", R2),
ER2 = gsub("ER2=", "", ER2))
Warning: Expected 5 pieces. Missing pieces filled with `NA` in 32 rows [2, 3, 5, 7, 10, 12, 13, 14, 16, 21, 22, 23, 24, 25, 26, 27, 29, 30, 32, 33, ...].
extracted.INFO
imputationQCinfo <- extracted.INFO %>%
full_join(inventory, by = intersect(colnames(extracted.INFO), colnames(inventory)))
#imputationQCinfo
imputationQCinfo.GRSorig <- imputationQCinfo %>%
rename(VCF_VariantID = SNP) %>%
filter(In_Original_GRS == 1) %>%
select(-c(In_Original_GRS, In_Revised_GRS)) %>%
mutate(GRS_version = "Sharp et al., 2019") %>%
select(GRS_version, GRS_rsID, VCF_VariantID, CHR, POS, REF, ALT, COUNTED, everything()) %>%
select(-c(CHR_POS)) %>%
rename(COUNTED_in_traw = COUNTED) %>%
mutate(Build = "GRCh37")
imputationQCinfo.GRSrevised <- imputationQCinfo %>%
rename(VCF_VariantID = SNP) %>%
filter(In_Revised_GRS == 1) %>%
select(-c(In_Original_GRS, In_Revised_GRS)) %>%
mutate(GRS_version = "Sharp et al., 2022") %>%
select(GRS_version, GRS_rsID, VCF_VariantID, CHR, POS, REF, ALT, COUNTED, everything()) %>%
select(-c(CHR_POS)) %>%
rename(COUNTED_in_traw = COUNTED) %>%
mutate(Build = "GRCh37")
imputationQCinfo.GRSorig
imputationQCinfo.GRSrevised
setwd(dir.GRSoriginal.Anno)
Warning: The working directory was changed to /Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Annotation/GRSoriginal inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
fwrite(imputationQCinfo.GRSorig, "MEGA_041822_CD_GRS_Sharp2019_ImputationQCinfo_GRCh37_v0.1_JRS.csv")
setwd(dir.GRSrevised.Anno)
fwrite(imputationQCinfo.GRSrevised, "MEGA_041822_CD_GRS_Sharp2022_ImputationQCinfo_GRCh37_v0.1_JRS.csv")
imputed.traw.GRSorig <- imputationQCinfo.GRSorig %>%
rename(COUNTED = COUNTED_in_traw) %>%
left_join(imputed.traw01,
by = intersect(colnames(imputationQCinfo.GRSorig %>% rename(COUNTED = COUNTED_in_traw)),
colnames(imputed.traw01))) %>%
select(-c(Extracted, In_Original_GRS, In_Revised_GRS)) %>%
select(GRS_version,
GRS_rsID,
VCF_VariantID, IID, COUNTED, Dosage.COUNTED,
CHR, POS, REF, ALT, INFO, AF, MAF, R2, ER2, Type,
everything()) %>%
mutate(temp = gsub("_WG", "_|WG", IID)) %>%
separate(temp, into = c("rm", "MEGA.IID"), sep = "[|]", extra = "merge", remove = FALSE) %>%
select(-c(IID, temp, rm)) %>%
select(GRS_version, MEGA.IID, GRS_rsID, VCF_VariantID,
COUNTED, Dosage.COUNTED, everything()) %>%
unique()
imputed.traw.GRSrevised <- imputationQCinfo.GRSrevised %>%
rename(COUNTED = COUNTED_in_traw) %>%
left_join(imputed.traw01,
by = intersect(colnames(imputationQCinfo.GRSrevised %>% rename(COUNTED = COUNTED_in_traw)),
colnames(imputed.traw01))) %>%
select(-c(Extracted, In_Original_GRS, In_Revised_GRS)) %>%
select(GRS_version,
GRS_rsID,
VCF_VariantID, IID, COUNTED, Dosage.COUNTED,
CHR, POS, REF, ALT, INFO, AF, MAF, R2, ER2, Type,
everything()) %>%
mutate(temp = gsub("_WG", "_|WG", IID)) %>%
separate(temp, into = c("rm", "MEGA.IID"), sep = "[|]", extra = "merge", remove = FALSE) %>%
select(-c(IID, temp, rm)) %>%
select(GRS_version, MEGA.IID, GRS_rsID, VCF_VariantID,
COUNTED, Dosage.COUNTED, everything()) %>%
unique()
imputed.traw.GRSorig
imputed.traw.GRSrevised
setwd(dir.Data)
Warning: The working directory was changed to /Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
MEGA.IDkey <- fread("MEGA_041822_MEGA2_to_HTP_ID_key_v0.1_JRS.csv") %>%
filter(HTP_participant == "Yes") %>%
select(FamilyID, RecordID, MEGA.FID, MEGA.IID, MEGA.LabID) %>%
unique()
imputed.traw.GRSorig01 <- imputed.traw.GRSorig %>%
full_join(MEGA.IDkey, by = "MEGA.IID") %>%
select(FamilyID, RecordID, MEGA.FID, MEGA.IID, MEGA.LabID, GRS_rsID, COUNTED, Dosage.COUNTED, everything()) %>%
unique()
imputed.traw.GRSrevised01 <- imputed.traw.GRSrevised %>%
full_join(MEGA.IDkey, by = "MEGA.IID") %>%
select(FamilyID, RecordID, MEGA.FID, MEGA.IID, MEGA.LabID, GRS_rsID, COUNTED, Dosage.COUNTED, everything()) %>%
unique()
imputed.traw.GRSorig01
imputed.traw.GRSrevised01
deltaChrPos_replacedSNPs <- anno.revisedGRS %>%
filter(!is.na(REPLACING)) %>%
filter(COMPONENT == "Non-HLA") %>%
rename(GRS_Component = COMPONENT,
Score_Allele.revised = SCORE_ALLELE,
`Score_Weight.revised` = SCORE,
Strand.revised = STRAND) %>%
rename(GRS_rsID.revised = RSID,
GRS_rsID.original = REPLACING) %>%
select(-c(`Freq_UKB(A1)`, `Freq_UKB(A2)`, `1000G?`,
A1, A2)) %>%
select(GRS_Component, GRS_rsID.revised, GRS_rsID.original, Score_Allele.revised, Score_Weight.revised, POSITION_DBSNP151) %>%
separate(POSITION_DBSNP151, into = c("Chr_rsID.revised", "Pos_rsID.revised"), sep = ":", extra = "merge", remove = TRUE) %>%
left_join(GRCh37.GRSorig %>%
rename(GRS_rsID.original = GRS_rsID,
Chr_rsID.original = Chr,
Pos_rsID.original = BP),
by = "GRS_rsID.original") %>%
left_join(sharpS3 %>%
select(Putative.Gene, SNP, Allele, `Weight.(β)`) %>%
rename(Putative_Gene.original = Putative.Gene, GRS_rsID.original = SNP, Score_Allele.original = Allele, Score_Weight.original = `Weight.(β)`),
by = "GRS_rsID.original") %>%
select(GRS_Component,
GRS_rsID.original, Score_Allele.original, Score_Weight.original, Chr_rsID.original, Pos_rsID.original,
GRS_rsID.revised, Score_Allele.revised, Score_Weight.revised, Chr_rsID.revised, Pos_rsID.revised,
everything()) %>%
mutate(Chr_rsID.original = as.numeric(Chr_rsID.original),
Chr_rsID.revised = as.numeric(Chr_rsID.revised),
Pos_rsID.original = as.numeric(Pos_rsID.original),
Pos_rsID.revised = as.numeric(Pos_rsID.revised)) %>%
mutate(delta.Chr = Chr_rsID.revised - Chr_rsID.original,
delta.Pos = Pos_rsID.revised - Pos_rsID.original) %>%
select(GRS_rsID.original, GRS_rsID.revised, delta.Chr, delta.Pos, everything()) %>%
select(GRS_rsID.original, GRS_rsID.revised, Chr_rsID.original, Pos_rsID.original, Chr_rsID.revised, Pos_rsID.revised, delta.Chr, delta.Pos)
deltaChrPos_replacedSNPs
setwd(dir.GRSrevised.Anno)
Warning: The working directory was changed to /Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Annotation/GRSrevised inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
fwrite(deltaChrPos_replacedSNPs, "MEGA_041822_DeltaChrPos_GRS_rsID_original_vs_replacement_v0.1_JRS.csv")
#cd '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data'
#wget 'https://analysistools.cancer.gov/LDlink/tmp/LDpair_0418388542.txt'
#mv 'LDpair_0418388542.txt' 'LDpair_rs1359062_rs1323292.txt'
#cd '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data'
#wget 'https://analysistools.cancer.gov/LDlink/tmp/LDpair_0438167674.txt'
#mv 'LDpair_0438167674.txt' 'LDpair_rs12142280_rs11801183.txt'
# rs2387397 rs947474
#cd '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data'
#wget 'https://analysistools.cancer.gov/LDlink/tmp/LDpair_0439208571.txt'
#mv 'LDpair_0439208571.txt' 'LDpair_rs2387397_rs947474.txt'
# "LDpair_rs12142280_rs11801183.txt" "LDpair_rs1359062_rs1323292.txt" "LDpair_rs2387397_rs947474.txt"
setwd(dir.Data)
Warning: The working directory was changed to /Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
data.LDpairs <- list.files(pattern = "LDpair") %>%
lapply(fread, fill = TRUE)
data.LDpairs
[[1]]
[[2]]
[[3]]
temp <- list()
for ( i in 1:length(data.LDpairs) ){
temp[[i]] <- data.LDpairs[[i]] %>% select(V1, V2) %>% filter(V1 == "D':" | V1 == "R2:" | V1 == "Chi-sq:" | V1 == "p-value:") %>%
mutate(dummy = 1,
V1 = gsub(":", "", V1),
Population = data.LDpairs[[i]]$V1[5]) %>%
spread(key = V1, value = V2) %>%
select(-dummy) %>%
cbind(.,
data.LDpairs[[i]] %>%
filter(grepl("rs", V1)==TRUE) %>%
select(V1) %>%
unique() %>%
filter(grepl("[(]", V1)==FALSE) %>%
t() %>%
as.data.frame() %>%
rename(SNP1 = V1, SNP2 = V2)) %>%
# cbind(.,
# data.LDpairs[[i]] %>%
# filter(grepl("_", V1)==TRUE) %>%
# select(V1, V3) %>%
# unique() %>%
# rename(SNP1allele_SNP2allele = V1,
# Correlation = V3)) %>%
#mutate(SNP1allele_SNP2allele = gsub("[:]", "", V1)) %>%
#separate(SNP1allele_SNP2allele, into = c("SNP1_Allele", "SNP2_Allele"), sep = "_", extra = "merge", remove = TRUE) %>%
select(Population, SNP1, SNP2,
`D'`, R2, `Chi-sq`, `p-value`,
#SNP1_Allele, SNP2_Allele,
everything()) %>%
rename(GRS_rsID.original = SNP1,
GRS_rsID.revised = SNP2)
#mutate(Correlation = gsub("[(]", "", Correlation),
# Correlation = gsub("[)]", "", Correlation))
}
LDstats <- temp %>% rbindlist()
LDstats
setwd(dir.GRSrevised.Anno)
fwrite(LDstats, "MEGA_041822_LD_GRS_rsID_original_vs_replacement_v0.1_JRS.csv")
#setwd(dir.GRSrevised.Anno)
#LDstats <- fread("MEGA_041822_LD_GRS_rsID_original_vs_replacement_v0.1_JRS.csv")
#LDstats
imputed.traw.GRSorig01 %>%
select(MEGA.IID) %>%
unique() %>%
nrow()
[1] 222
imputed.traw.GRSrevised01 %>%
select(MEGA.IID) %>%
unique() %>%
nrow()
[1] 222
GRSrevised_map_PutativeGenes2019
setwd(dir.GRSrevised.Anno)
Warning: The working directory was changed to /Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Annotation/GRSrevised inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
fwrite(GRSrevised_map_PutativeGenes2019, "MEGA_041822_GRS2022COMPONENT_to_GRS2019PutativeGene_GRS2019nonHLADQSNPsOnly_v0.1_JRS.csv")
setwd(dir.GRSoriginal.Anno)
fwrite(GRSrevised_map_PutativeGenes2019, "MEGA_041822_GRS2022COMPONENT_to_GRS2019PutativeGene_GRS2019nonHLADQSNPsOnly_v0.1_JRS.csv")
nonHLADQ_SNPs.GRSrevised <- GRSrevised_map_PutativeGenes2019 %>%
filter(`GRS Component (Sharp 2019)` == "Non-HLA-DQ") %>%
select(`Variant (Sharp 2022)`) %>%
unique()
nonHLADQ_SNPs.GRSorig
nonHLADQ_SNPs.GRSrevised
imputed.traw.GRSorig02 <- imputed.traw.GRSorig01 %>%
filter(GRS_rsID %in% nonHLADQ_SNPs.GRSorig$`Variant (Sharp 2019)`)
imputed.traw.GRSrevised02 <- imputed.traw.GRSrevised01 %>%
filter(GRS_rsID %in% nonHLADQ_SNPs.GRSrevised$`Variant (Sharp 2022)`)
imputed.traw.GRSorig02$GRS_rsID %>% unique() %>% length()
[1] 38
imputed.traw.GRSrevised02$GRS_rsID %>% unique() %>% length()
[1] 38
#[1] 38
#[1] 38
imputed.traw.GRSorig02
imputed.traw.GRSrevised02
print("We have now prepared one dataframe of non-HLA-DQ SNP dosage data for the rsIDs used in the GRS published in 2019, and one dataframe of non-HLA-DQ SNP dosage data for the rsIDs used in the revised GRS shared on github in 2022. In both cases the GRS involves a total of 38 non-HLA-DQ SNPs.")
[1] "We have now prepared one dataframe of non-HLA-DQ SNP dosage data for the rsIDs used in the GRS published in 2019, and one dataframe of non-HLA-DQ SNP dosage data for the rsIDs used in the revised GRS shared on github in 2022. In both cases the GRS involves a total of 38 non-HLA-DQ SNPs."
imputed.traw.GRSorig02 %>%
select(MEGA.IID) %>%
unique() %>%
nrow()
[1] 222
imputed.traw.GRSrevised02 %>%
select(MEGA.IID) %>%
unique() %>%
nrow()
[1] 222
print("Non-HLA-DQ SNP dosage data for the 2019 GRS includes 222 IIDs.")
[1] "Non-HLA-DQ SNP dosage data for the 2019 GRS includes 222 IIDs."
print("Non-HLA-DQ SNP dosage data for the 2022 GRS includes 222 IIDs.")
[1] "Non-HLA-DQ SNP dosage data for the 2022 GRS includes 222 IIDs."
imputed.traw.GRSorig02
imputed.traw.GRSrevised02
imputed.traw.GRSorig03
imputed.traw.GRSrevised03
setwd(dir.Data)
Warning: The working directory was changed to /Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
fwrite(imputed.traw.GRSorig03, "MEGA_041822_Dosage_GRS2019_ImputedRef1000G3v5_excludingChr21snp_0.1_JRS.csv")
fwrite(imputed.traw.GRSorig03, "MEGA_041822_Dosage_GRS2019_ImputedRef1000G3v5_excludingChr21snp_0.1_JRS.tsv", sep = "\t")
setwd(dir.Data)
fwrite(imputed.traw.GRSrevised03, "MEGA_041822_Dosage_GRS2022_ImputedRef1000G3v5_excludingChr21snp_0.1_JRS.csv")
fwrite(imputed.traw.GRSrevised03, "MEGA_041822_Dosage_GRS2022_ImputedRef1000G3v5_excludingChr21snp_0.1_JRS.tsv")
cd '/Users/shawjes/Dropbox/EspinosaGroup/DATA_MAIN/MEGA/Imputation_Ref1000G3v5/Out/Imputation_Results/Results'
cat *.info.gz >> '/Users/shawjes/Dropbox/EspinosaGroup/DATA_MAIN/MEGA/Imputation_Ref1000G3v5/Out/Imputation_Results_CLEAN/autosomesNoChr21.info.gz'
names(snp_list.split) #[1] "1" "2" "3" "4" "6" "10" "11" "12" "14" "15" "16" "18" "22"
length(snp_list.split) #[1] 13
imputation.info.GRS.chr1 <- imputation.info$`1` %>% filter(SNP %in% snp_list.split$`1`$SNP)
imputation.info.GRS.chr2 <- imputation.info$`2` %>% filter(SNP %in% snp_list.split$`2`$SNP)
imputation.info.GRS.chr3 <- imputation.info$`3` %>% filter(SNP %in% snp_list.split$`3`$SNP)
imputation.info.GRS.chr4 <- imputation.info$`4` %>% filter(SNP %in% snp_list.split$`4`$SNP)
imputation.info.GRS.chr6 <- imputation.info$`6` %>% filter(SNP %in% snp_list.split$`6`$SNP)
imputation.info.GRS.chr10 <- imputation.info$`10` %>% filter(SNP %in% snp_list.split$`10`$SNP)
imputation.info.GRS.chr11 <- imputation.info$`11` %>% filter(SNP %in% snp_list.split$`11`$SNP)
imputation.info.GRS.chr12 <- imputation.info$`12` %>% filter(SNP %in% snp_list.split$`12`$SNP)
imputation.info.GRS.chr14 <- imputation.info$`14` %>% filter(SNP %in% snp_list.split$`14`$SNP)
imputation.info.GRS.chr15 <- imputation.info$`15` %>% filter(SNP %in% snp_list.split$`15`$SNP)
imputation.info.GRS.chr16 <- imputation.info$`16` %>% filter(SNP %in% snp_list.split$`16`$SNP)
imputation.info.GRS.chr18 <- imputation.info$`18` %>% filter(SNP %in% snp_list.split$`18`$SNP)
imputation.info.GRS.chr22 <- imputation.info$`22` %>% filter(SNP %in% snp_list.split$`22`$SNP)
imputation.info.GRS <- list(imputation.info.GRS.chr1,
imputation.info.GRS.chr2,
imputation.info.GRS.chr3,
imputation.info.GRS.chr4,
imputation.info.GRS.chr6,
imputation.info.GRS.chr10,
imputation.info.GRS.chr11,
imputation.info.GRS.chr12,
imputation.info.GRS.chr14,
imputation.info.GRS.chr15,
imputation.info.GRS.chr16,
imputation.info.GRS.chr18,
imputation.info.GRS.chr22) %>%
lapply(as.data.frame) %>%
rbindlist() %>%
arrange(Rsq) %>%
as.data.frame()
imputation.info.GRS %>% dim() #[1] 43 13
imputation.info.GRS
setwd(dir.Data)
fwrite(imputation.info.GRS, "MEGA_041822_Extracted_from_ImputedRef1000G3v5_NoChr21_v0.1_JRS.info.gz")
imputed.traw02 <- imputed.traw01 %>%
left_join(imputationQCinfo.GRSorig, by = "CHR_POS") %>%
rename(`INFO.REF(0)` = `REF(0)`,
`INFO.ALT(1)` = `ALT(1)`,
INFO.ALT_Frq = ALT_Frq,
INFO.MAF = MAF,
INFO.AvgCall = AvgCall,
INFO.Rsq = Rsq,
INFO.Genotyped = Genotyped,
INFO.LooRsq = LooRsq,
INFO.EmpR = EmpR,
INFO.EmpRsq = EmpRsq,
INFO.Dose0 = Dose0,
INFO.Dose1 = Dose1)
imputed.traw02
originalGRS.nonHLADQ.dosage.INFO <- imputed.traw02 %>% filter(In_Original_GRS == 1) %>%
select(-c(In_Original_GRS, In_Revised_GRS, Extracted)) %>%
select(IID, CHR, POS, SNP, COUNTED, ALT, Dosage.COUNTED, everything()) %>%
arrange(INFO.Rsq)
revisedGRS.dosage.INFO <- imputed.traw02 %>% filter(In_Revised_GRS == 1) %>%
select(-c(In_Original_GRS, In_Revised_GRS, Extracted)) %>%
select(IID, CHR, POS, SNP, COUNTED, ALT, Dosage.COUNTED, everything()) %>%
arrange(INFO.Rsq)
originalGRS.nonHLADQ.INFO <- originalGRS.nonHLADQ.dosage.INFO %>% select(-c(IID, Dosage.COUNTED)) %>% unique()
revisedGRS.INFO <- revisedGRS.dosage.INFO %>% select(-c(IID, Dosage.COUNTED)) %>% unique()
originalGRS.nonHLADQ.dosage.INFO
revisedGRS.dosage.INFO
originalGRS.nonHLADQ.INFO
revisedGRS.INFO
INFO.nonHLADQ.origGRS <- anno.nonHLADQ.origGRS %>%
filter(grepl("_", chrom)==FALSE) %>%
select(name, chrom, chromEnd) %>%
rename(GRS_rsID = name, CHR = chrom, POS = chromEnd) %>%
as.data.frame() %>%
mutate(CHR = as.character(CHR),
CHR = gsub("chr", "", CHR),
POS = as.character(POS)) %>%
full_join(originalGRS.nonHLADQ.INFO %>%
mutate(CHR = as.character(CHR), POS = as.character(POS)),
by = c("CHR", "POS"))
INFO.nonHLADQ.origGRS
INFO.revisedGRS <- anno.revisedGRS %>%
separate(POSITION_DBSNP151, into = c("CHR", "POS"), sep = ":", extra = "merge", remove = FALSE) %>%
select(RSID, CHR, POS) %>%
unique() %>%
rename(GRS_rsID = RSID) %>%
as.data.frame() %>%
mutate(CHR = as.character(CHR),
POS = as.character(POS)) %>%
full_join(revisedGRS.INFO %>%
mutate(CHR = as.character(CHR), POS = as.character(POS)),
by = c("CHR", "POS"))
INFO.revisedGRS
INFO.nonHLADQ.origGRS
INFO.revisedGRS
setwd(dir.Data)
fwrite(INFO.nonHLADQ.origGRS, "MEGA_041812_ImputationINFO_OriginalGRS_nonHLADQsnps_v0.1_JRS.csv")
fwrite(INFO.revisedGRS, "MEGA_041812_ImputationINFO_RevisedGRS_v0.1_JRS.csv")
theme_set(theme_gray(base_size = 12, base_family = "Arial") +
theme(panel.border = element_rect(colour="black", fill = "transparent"),
plot.title = element_text(face="bold", hjust = 0), # lineheight=.8, size=20,
axis.text = element_text(color="black", size = 11), #size = 14
axis.text.x = element_text(angle = 0, hjust = NULL),
strip.background = element_rect(colour="black", fill = "light grey", size = 1), # adjusts facet label borders (if any)
panel.background = element_blank(),
panel.grid = element_blank()
))
RedBlue <- c("#CD3333", "#1874CD")
GrayBlue <- c("grey", "#2b8cbe")
plotData.INFO.nonHLADQ.origGRS <- INFO.nonHLADQ.origGRS %>%
select(GRS_rsID, SNP, INFO.Rsq, INFO.EmpRsq, INFO.MAF) %>%
unique() %>%
mutate(plotName = paste(GRS_rsID, " (", SNP, ")", sep = "")) %>%
select(plotName, everything()) %>%
mutate(INFO.Rsq = as.numeric(INFO.Rsq)) %>%
arrange(INFO.Rsq) %>%
# Filter out the GRS SNP located on Chromosome 21:
filter(GRS_rsID != "rs1893592")
plotData.INFO.revisedGRS <- INFO.revisedGRS %>%
select(GRS_rsID, SNP, INFO.Rsq, INFO.EmpRsq, INFO.MAF) %>%
unique() %>%
mutate(plotName = paste(GRS_rsID, " (", SNP, ")", sep = "")) %>%
select(plotName, everything()) %>%
mutate(INFO.Rsq = as.numeric(INFO.Rsq)) %>%
arrange(INFO.Rsq) %>%
# Filter out the GRS SNP located on Chromosome 21:
filter(GRS_rsID != "rs1893592")
plotData.INFO.nonHLADQ.origGRS
plotData.INFO.revisedGRS
plot.nonHLADQ.origGRS.Rsq <- plotData.INFO.nonHLADQ.origGRS %>%
ggplot(aes(x = reorder(plotName, INFO.Rsq), y = INFO.Rsq)) +
geom_bar(stat = "identity",
#color = GrayBlue[[1]],
fill = GrayBlue[[1]]) +
coord_flip() +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "dodgerblue3") +
labs(title = "Estimated correlation of imputed genotypes and true, unobserved genotypes",
subtitle = "Original GRS (2019), non-HLA-DQ SNPs only",
caption = 'Rsq: "The estimated value of the squared correlation between imputed genotypes and true, unobserved genotypes"\n(https://genome.sph.umich.edu/wiki/Minimac3_Info_File).') +
ylab("Rsq") +
xlab("") +
theme(plot.caption = element_text(hjust = 0),
aspect.ratio = 0.9)
plot.revisedGRS.Rsq <- plotData.INFO.revisedGRS %>%
ggplot(aes(x = reorder(plotName, INFO.Rsq), y = INFO.Rsq)) +
geom_bar(stat = "identity",
#color = GrayBlue[[1]],
fill = GrayBlue[[1]]) +
coord_flip() +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "dodgerblue3") +
labs(title = "Estimated correlation of imputed genotypes and true, unobserved genotypes",
subtitle = "Revised GRS (2022)",
caption = 'Rsq: "The estimated value of the squared correlation between imputed genotypes and true, unobserved genotypes"\n(https://genome.sph.umich.edu/wiki/Minimac3_Info_File).') +
ylab("Rsq") +
xlab("") +
theme(plot.caption = element_text(hjust = 0),
aspect.ratio = 0.9)
plot.nonHLADQ.origGRS.Rsq
plot.revisedGRS.Rsq
filename <- "MEGA_041822_Plot_Imputation_Rsq_OrigGRS_nonHLADQ_v0.1_JRS"
setwd(dir.Plots)
ggsave(plot = plot.nonHLADQ.origGRS.Rsq,
filename = paste(filename, ".png", sep = ""), width = 10, height = 10, units = "in")
setwd(dir.Plots)
ggsave(plot = plot.nonHLADQ.origGRS.Rsq,
filename = paste(filename, ".pdf", sep = ""), device = cairo_pdf, width = 10, height = 10, units = "in")
filename <- "MEGA_041822_Plot_Imputation_Rsq_RevisedGRS_v0.1_JRS"
setwd(dir.Plots)
ggsave(plot = plot.revisedGRS.RSQ,
filename = paste(filename, ".png", sep = ""), width = 10, height = 10, units = "in")
setwd(dir.Plots)
ggsave(plot = plot.revisedGRS.RSQ,
filename = paste(filename, ".pdf", sep = ""), device = cairo_pdf, width = 10, height = 10, units = "in")
plotData.INFO.nonHLADQ.origGRS %>% nrow()
plotData.INFO.nonHLADQ.origGRS %>% filter(INFO.EmpRsq != "-") %>% nrow()
plot.nonHLADQ.origGRS.EmpRsq <- plotData.INFO.nonHLADQ.origGRS %>%
filter(INFO.EmpRsq != "-") %>%
mutate(INFO.EmpRsq = as.numeric(INFO.EmpRsq)) %>%
ggplot(aes(x = reorder(plotName, INFO.EmpRsq), y = INFO.EmpRsq)) +
geom_bar(stat = "identity",
#color = GrayBlue[[1]],
fill = GrayBlue[[1]]) +
coord_flip() +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "dodgerblue3") +
labs(title = "Estimated correlation of imputed genotypes and true, observed genotypes",
subtitle = "Original GRS (2019), observed non-HLA-DQ SNPs only",
caption = 'EmpRsq: The estimated squared correlation "between the true genotyped values and the imputed dosages that were calculated by hiding all known genotyped for the given SNP"\n(https://genome.sph.umich.edu/wiki/Minimac3_Info_File).') +
ylab("EmpRsq") +
xlab("") +
theme(plot.caption = element_text(hjust = 0),
aspect.ratio = 0.9)
plot.revisedGRS.EmpRsq <- plotData.INFO.revisedGRS %>%
filter(INFO.EmpRsq != "-") %>%
mutate(INFO.EmpRsq = as.numeric(INFO.EmpRsq)) %>%
ggplot(aes(x = reorder(plotName, INFO.EmpRsq), y = INFO.EmpRsq)) +
geom_bar(stat = "identity",
#color = GrayBlue[[1]],
fill = GrayBlue[[1]]) +
coord_flip() +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "dodgerblue3") +
labs(title = "Estimated correlation of imputed genotypes and true, observed genotypes",
subtitle = "Revised GRS (2022), observed SNPs only",
caption = 'EmpRsq: The estimated squared correlation "between the true genotyped values and the imputed dosages that were calculated by hiding all known genotyped for the given SNP"\n(https://genome.sph.umich.edu/wiki/Minimac3_Info_File).') +
ylab("EmpRsq") +
xlab("") +
theme(plot.caption = element_text(hjust = 0),
aspect.ratio = 0.9)
plot.nonHLADQ.origGRS.EmpRsq
plot.revisedGRS.EmpRsq
filename <- "MEGA_041422_Plot_Imputation_EmpRsq_OrigGRS_nonHLADQ_v0.1_JRS"
setwd(dir.Plots)
ggsave(plot = plot.nonHLADQ.origGRS.EmpRsq,
filename = paste(filename, ".png", sep = ""), width = 10, height = 10, units = "in")
setwd(dir.Plots)
ggsave(plot = plot.nonHLADQ.origGRS.EmpRsq,
filename = paste(filename, ".pdf", sep = ""), device = cairo_pdf, width = 10, height = 10, units = "in")
filename <- "MEGA_041422_Plot_Imputation_EmpRsq_RevisedGRS_v0.1_JRS"
setwd(dir.Plots)
ggsave(plot = plot.revisedGRS.EmpRsq,
filename = paste(filename, ".png", sep = ""), width = 10, height = 10, units = "in")
setwd(dir.Plots)
ggsave(plot = plot.revisedGRS.EmpRsq,
filename = paste(filename, ".pdf", sep = ""), device = cairo_pdf, width = 10, height = 10, units = "in")
grs_rsIDs.orig <- rbind(sharpS1 %>%
select(`HLA-DQ.haplotype`, SNP1, SNP2) %>%
rename(HLA_DQ_haplotype = `HLA-DQ.haplotype`) %>%
gather(key = "key", value = "value", SNP1:SNP2) %>%
mutate(SNP_index = gsub("SNP", "", key) %>% as.numeric()) %>%
rename(GRS_rsID = value) %>%
select(HLA_DQ_haplotype, SNP_index, GRS_rsID) %>%
arrange(HLA_DQ_haplotype) %>%
select(GRS_rsID),
anno.nonHLADQ.origGRS %>% select(name) %>% rename(GRS_rsID = name) %>% as.data.frame()) %>%
unique()
grs_rsIDs.revised <- anno.revisedGRS %>%
select(RSID) %>%
rename(GRS_rsID = RSID) %>%
unique()
grs_rsIDs.orig_revised <- rbind(grs_rsIDs.orig, grs_rsIDs.revised) %>%
unique()
grs_rsIDs.orig
grs_rsIDs.revised
grs_rsIDs.orig_revised
setwd(dir.Data)
fwrite(grs_rsIDs.orig_revised, "MEGA_041822_rsIDs_GRSoriginal_GRSrevised_v0.1_JRS.txt", sep = "\t", col.names = FALSE, row.names = FALSE, quote = FALSE)
fwrite(grs_rsIDs.orig, "MEGA_041822_rsIDs_GRSoriginal_v0.1_JRS.txt", sep = "\t", col.names = FALSE, row.names = FALSE, quote = FALSE)
fwrite(grs_rsIDs.revised, "MEGA_041822_rsIDs_GRSrevised_v0.1_JRS.txt", sep = "\t", col.names = FALSE, row.names = FALSE, quote = FALSE)
grs_rsIDs.orig$GRS_rsID %>% paste(collapse = ",")
grs_rsIDs.revised$GRS_rsID %>% paste(collapse = ",")
https://pubs.broadinstitute.org/mammals/haploreg/haploreg.php https://grasp.nhlbi.nih.gov/Search.aspx
query.GRSorig <- c("rs1049124","rs7775228","rs2187668","rs1617322","rs73405471","rs79550239","rs7454108","rs3957146","rs10800746","rs12068671","rs12142280",
"rs1359062","rs4445406","rs10167650","rs1018326","rs13003464","rs1980422","rs6715106","rs990171","rs1353248","rs2030519","rs2561288",
"rs61579022","rs7616215","rs13132308","rs1050976","rs1611710","rs17264332","rs182429","rs2301226","rs3128927","rs4143332","rs55743914",
"rs6937061","rs1250552","rs2387397","rs10892258","rs61907765","rs7104791","rs3184504","rs11851414","rs1378938","rs6498114","rs11875687",
"rs1893592","rs4821124")
query.GRSrevised <- c("rs1049124","rs2187668","rs73405471","rs9275437","rs4143332","rs3128927","rs2301226","rs6937061","rs1611710","rs13132308","rs2030519",
"rs1323292","rs17264332","rs6715106","rs55743914","rs990171","rs1980422","rs3184504","rs61907765","rs13003464","rs11875687","rs12068671",
"rs1250552","rs10892258","rs1018326","rs182429","rs7104791","rs4821124","rs4445406","rs11801183","rs6498114","rs1353248","rs947474",
"rs1893592","rs11851414","rs1378938","rs10800746","rs1050976","rs7616215","rs2561288","rs61579022","rs10167650")
query.GRSorig
query.GRSrevised
#install.packages("haploR", dependencies = TRUE)
library(haploR)
queryHaploReg.GRSorig <- queryHaploreg(query = query.GRSorig, url = "https://pubs.broadinstitute.org/mammals/haploreg/haploreg.php")
queryHaploReg.GRSrevised <- queryHaploreg(query = query.GRSrevised, url = "https://pubs.broadinstitute.org/mammals/haploreg/haploreg.php")
queryHaploReg.GRSorig
queryHaploReg.GRSrevised
setwd(dir.GRSanno)
fwrite(queryHaploReg.GRSorig, "041422_HaploReg_GRSoriginal_v0.1_JRS.txt")
fwrite(queryHaploReg.GRSrevised, "041422_HaploReg_GRSrevised_v0.1_JRS.txt")
pwd
wget 'https://pubs.broadinstitute.org/mammals/haploreg/haploreg.php'
library(XML)
library(httr)
theurl <- 'https://pubs.broadinstitute.org/mammals/haploreg/haploreg.php'
doc <- htmlParse(GET(theurl, user_agent("Chrome")))
results <- xpathSApply(doc, "//*/table[@id='table_results_r_1']")
results <- readHTMLTable(results[[1]])
rm(doc)
library(XML)
library(RCurl)
library(rlist)
theurl <- getURL("https://pubs.broadinstitute.org/mammals/haploreg/haploreg.php",.opts = list(ssl.verifypeer = FALSE) )
tables <- readHTMLTable(theurl)
tables <- list.clean(tables, fun = is.null, recursive = FALSE)
n.rows <- unlist(lapply(tables, function(t) dim(t)[1]))
tables
setwd("/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Annotation")
haploreg.GRSorig<- fread("041422_HaploReg_GRSoriginal_v0.1_JRS.txt", fill = TRUE)
haploreg.GRSrevised <- fread("041422_HaploReg_GRSrevised_v0.1_JRS.txt", fill = TRUE)
GRASP.GRSorig <- read.xlsx("041422_GRASPsearch_GRSoriginal_v0.1_JRS.xlsx")
GRASP.GRSrevised <- read.xlsx("041422_GRASPsearch_GRSrevised_v0.1_JRS.xlsx")
test <- fread("/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Annotation/041422_HaploReg_GRSoriginal_DQB1_v0.1_JRS.txt", sep = "\t", fill = TRUE)
test
haploreg.GRSorig
haploreg.GRSrevised
GRASP.GRSorig
GRASP.GRSrevised
plotData.INFO.revisedGRS %>%
filter(INFO.EmpRsq != "-")
names(imputation.info)[[1]]
imputation.info[[1]]
names(snp_list.split)[[1]]
snp_list.split[[1]]
names(snp_list.split) %>% length()
imputation.info %>% length()
#imputation.info.GRS <- list()
#for ( i in 1:c(seq(1:20),22)) {
#imputation.info.GRS[[i]] <- imputation.info$`1` %>% filter(SNP %in% snp_list.split$`1`$SNP)
snp_list.split %>% names() %>% paste(collapse = ",")
temp <- c(1,2,3,4,6,10,11,12,14,15,16,18,22)
imputation.info.GRS[[1]] <- imputation.info$`1` %>% filter(SNP %in% snp_list.split$`1`$SNP)
temp <- c(1,2,3,4,6,10,11,12,14,15,16,18,22)
imputation.info.GRS <- list()
for ( i in length(temp)) {
chr.now <- temp[[i]]
imputation.info.GRS[[i]] <- imputation.info[[chr.now]] %>% filter(SNP %in% snp_list.split[[chr.now]]$SNP)
}
imputation.info.GRS %>% rbindlist() %>% dim()
imputation.info.GRS %>% lapply(as.data.frame) %>% lapply(dim)
setwd(dir.Data)
info.GRSsnps <- fread("/Users/shawjes/Dropbox/EspinosaGroup/DATA_MAIN/MEGA/Imputation_Ref1000G3v5/Out/Imputation_Results_CLEAN/autosomesNoChr21.info.gz", sep = "\t")
info.GRSsnps
info <- fread("/Users/shawjes/Dropbox/EspinosaGroup/DATA_MAIN/MEGA/Imputation_Ref1000G3v5/Out/Imputation_Results_CLEAN/autosomesNoChr21.info.gz")
info
to_grep <- imputed.traw01 %>% select(SNP) %>% unique()
setwd(dir.Data)
fwrite(to_grep, "MEGA_041822_Imputed_VariantIDs_OrigGRS_RevisedGRS_v0.1_JRS.txt", sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE)
cd '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data'
zcat < '/Users/shawjes/Dropbox/EspinosaGroup/DATA_MAIN/MEGA/Imputation_Ref1000G3v5/Out/Imputation_Results_CLEAN/autosomesNoChr21.info.gz' | awk 'NR==FNR{_[$1];next}!($1 in _)' 'MEGA_041822_Imputed_VariantIDs_OrigGRS_RevisedGRS_v0.1_JRS.txt' - > 'MEGA_041822_SNPs_OrigGRS_RevisedGRS_v0.1_JRS.info.txt'
cd '/Users/shawjes/Dropbox/EspinosaGroup/ANALYSIS/Celiac_MultiOmics/GRS/DSMIG_Shared/Manuscript_Figure1/Data'
zcat < '/Users/shawjes/Dropbox/EspinosaGroup/DATA_MAIN/MEGA/Imputation_Ref1000G3v5/Out/Imputation_Results_CLEAN/autosomesNoChr21.info.gz' | grep -f 'MEGA_041822_Imputed_VariantIDs_OrigGRS_RevisedGRS_v0.1_JRS.txt' > 'MEGA_041822_SNPs_OrigGRS_RevisedGRS_v0.1_JRS.info'
setwd(dir.Data)
GRSsnps.info <- fread("MEGA_041822_SNPs_OrigGRS_RevisedGRS_v0.1_JRS.info")
GRSsnps.info
setwd(dir.ImputationResultsRAW)
imputation.info <- list.files(pattern = "info") %>%
lapply(fread) %>%
rbindlist()
imputation.info
Code reference: https://stackoverflow.com/questions/19380925/grep-a-large-list-against-a-large-file grep -f the_ids.txt huge.csv -F, –fixed-strings Interpret PATTERN as a list of fixed strings, separated by newlines, any of which is to be matched. (-F is specified by POSIX.)
imputed.dosage.OrigGRS.nonHLADQ %>%
select(-c(IID, Dosage.COUNTED)) %>%
unique() %>%
select(SNP) %>%
left_join(imputation.info, by = "SNP")
imputed.dosage.RevisedGRS %>%
select(-c(IID, Dosage.COUNTED)) %>%
unique() %>%
select(SNP)
imputation.info